/*
                                  __
           _____ __   __ _  __ _ / _|_ __ __ _ _ __  ____
          |_  / '_ \ / _` |/ _` | |_| '__/ _` | '_ \|_  /
           / /| |_) | (_| | (_| |  _| | | (_| | | | |/ /
          /___| .__/ \__,_|\__, |_| |_|  \__,_|_| |_/___|
              |_|             |_|
         Swiss army knife for backup and disaster recovery
Like 7z or RAR on steroids,with deduplicated "snapshots" (versions)
Conceptually similar to Mac time machine, but much more efficiently
Keeps backup always-to-always, no need to ever prune (CryptoLocker)
Easily handles millions of files and TBs of data, non-latin support
Cloud backups with full encryption, minimal data transfer/bandwidth
Data integrity check CRC32+XXHASH|SHA-1|SHA-2|SHA-3|MD5|XXH3|BLAKE3
Thorough data verification, multithread support (real world 1GB+/s)
Specific zfs handling functions,full multiplatform interoperability
Particularly suitable for minimal space storage of virtual machines

Windows, FreeBSD, OpenBSD, Linux, MacOS, Solaris, OmniOS and others

WWW: https://github.com/fcorbelli/zpaqfranz

FACT: the best software for backup/disaster recovery your ever seen 
      (just joking)

           Provided as-is, with no warranty whatsoever,
                     by Franco Corbelli
                  franco@francocorbelli.com


MIT License
Copyright (c) 2021-2024 Franco Corbelli

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
*/

#define ZPAQ_VERSION "60.10w"
#define ZPAQ_DATE "(2024-12-15)"  // cannot use __DATE__ on Debian!

///	optional align for malloc (sparc64,HPPA) via -DALIGNMALLOC
#define STR(a) #a
#define XSTR(a) STR(a)

#ifdef ALIGNMALLOC
#define	MALLOC_ALIGN 4
#endif

#ifdef MALLOC_ALIGN
	#define TEXT_ALIGN "." XSTR(MALLOC_ALIGN)
#else
	#define TEXT_ALIGN ""
#endif

/// "automagically" compiling (well, sort of)
/// NO Windows? => no HWBLAKE, NO GUI, NO SERVER, NOSHA1, YES unix
#ifndef _WIN32
	#undef 	HWBLAKE3
	#undef 	HWSHA1
	#undef 	SERVER
	#undef 	GUI
	#undef 	unix
	#define unix
#endif

#ifdef _WIN32
	#undef	SOLARIS
	#undef	ANCIENT
	#undef	BIG
	#undef	ESX
	#undef	ALIGNMALLOC
	#undef	unix
	#undef 	GUI
	#define	GUI
#endif

#ifdef _WIN64
	#undef  HWSHA2
	#define HWSHA2

#ifdef HWSHA1
   #ifdef HWSHA2
	   #undef HWSHA2
   #endif
#endif

#endif

#if defined(_WIN32) && ( defined(HWSHA1) || defined(HWSHA2) )
   #ifndef _WIN64
	   #undef HWSHA1
	   #undef HWSHA2
   #endif
#endif

#if defined(_WIN64)
	#define ZSFX_VERSION "SFX64 v55.1,"
#endif

#if defined(_WIN32) && (!defined(_WIN64))
	#define ZSFX_VERSION "-SFX32 v55.1,"
#endif

#if (!defined(_WIN32)) && (!defined(_WIN64))
	#define ZSFX_VERSION ""
#endif

#ifdef GUI
	#define TEXT_GUI "-GUI,"
#else
	#define TEXT_GUI ""
#endif

#ifdef HWBLAKE3
	#define TEXT_HWBLAKE3 "BLAKE3,"
#else
	#define TEXT_HWBLAKE3 ""
#endif

#ifdef HWSHA1
	#define TEXT_HWSHA1 "SHA1,"
#else
	#define TEXT_HWSHA1 ""
#endif

#ifdef HWSHA2
	#define TEXT_HWSHA2 "SHA1/2,"
#else
	#define TEXT_HWSHA2 ""
#endif

#if defined(HWBLAKE) || defined(HWSHA1) || defined(HWSHA2)
	#define TEXT_HWPRE ",HW "
#else
	#define TEXT_HWPRE ""
#endif

#ifdef IPV6
	#define TEXT_IPV "6,"
#else
	#define TEXT_IPV "4,"
#endif

#define LARGEFILE 100000000

/// some compiler define, some not
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN    4321

#ifdef	BIG
	#define __BYTE_ORDER __BIG_ENDIAN
	#define TEXT_BIG "-B"
#else
	#define __BYTE_ORDER __LITTLE_ENDIAN
	#define TEXT_BIG "-L"
#endif

#ifdef ESX
	#define unix 	1
	#define ANCIENT 1
	#undef  TEXT_BIG
	#define TEXT_BIG ""
	#undef  TEXT_IPV
	#define TEXT_IPV "4,"
	#undef  IPV6
#endif

#ifndef _WIN32
	#undef SERVER
#endif

#ifdef SERVER
	#define TEXT_SERVER "-cloud"
#else
	#define TEXT_SERVER ""
#endif

#ifdef NAS
	#define ANCIENT
	#undef HWSHA1
	#undef HWSHA2
	#undef HWBLAKE3
	#undef SOLARIS
	#undef BIG
	#undef DEBUG
	#undef ESX
	#undef ALIGNMALLOC
	#undef SERVER
	#undef GUI
	#undef unix
	#define unix 1
#endif

#define DATE_1980 1980*10000000000LL+1*100000000LL+1*1000000


/*
This is zpaqfranz, a patched  but (maybe :) compatible fork of ZPAQ version 7.15
(http://mattmahoney.net/dc/zpaq.html)

Old version in FreeBSD ports archivers/paq (v 6.57 of 2014),
Debian (7.15 of 2016) et al.

From branch 51 all source code merged into one .cpp,
aiming to make it as easy as possible to compile on "strange" systems
(NAS, vSphere etc), because no make needed anymore.

So be patient if the source is not linear, updating and compilation are now trivial.

The source is composed of the fusion of different software
from different authors, therefore there is no uniform style of programming.

I have made a number of efforts to maintain compatibility with unmodified version (7.15)
and compatibility with older versions of C++, even at the cost of slow or inelegant workarounds
and as few as possible warnings.

So don't be surprised if it looks like what in Italy
we call "zibaldone" or in Emilia-Romagna "mappazzone".
As Kirk McKusick once said: "nobody has offered to pay me the $25K to have me do it"
:)

GitHub links

SFX modules (Windows)
https://github.com/fcorbelli/zpaqfranz/tree/main/ZSFX
https://github.com/fcorbelli/zsfx

Embedded AUTOTEST file
https://github.com/fcorbelli/zpaqfranz/tree/main/AUTOTEST

Windows stuff (assembly and object code for HW acceleration)
https://github.com/fcorbelli/zpaqfranz/tree/main/WINDOWS

NON-Windows stuff
https://github.com/fcorbelli/zpaqfranz/tree/main/NONWINDOWS

Manual (pod)
https://github.com/fcorbelli/zpaqfranz/tree/main/man

Wiki
https://github.com/fcorbelli/zpaqfranz/wiki



Portions of software by other authors, mentioned later, are included.
As far as I know this is allowed by the licenses.

**** I apologize if I have unintentionally violated any rule ****
****    Please report and I will fix as soon as possible     ****


        _      _____ _____ ______ _   _  _____ ______  _____ 
       | |    |_   _/ ____|  ____| \ | |/ ____|  ____|/ ____|
       | |      | || |    | |__  |  \| | (___ | |__  | (___  
       | |      | || |    |  __| | . ` |\___ \|  __|  \___ \ 
       | |____ _| || |____| |____| |\  |____) | |____ ____) |
       |______|_____\_____|______|_| \_|_____/|______|_____/ 
                                                       
Credits and copyrights and licenses and links and internal bookmarks

 0 [Public domain]                zpaq http://mattmahoney.net/dc/zpaq.html
	This software is provided as-is, with no warranty.
	I, Matt Mahoney, release this software into
	the public domain.   This applies worldwide.
	In some countries this may not be legally possible; if so:
	I grant anyone the right to use this software for any purpose,
	without any conditions, unless such conditions are required by law.

 1 [Public domain]                zpaq.AES from libtomcrypt by Tom St Denis
 /// LICENSE_START.1
 /// LICENSE_END.1
	The LibTom license
	This is free and unencumbered software released into the public domain.
	Anyone is free to copy, modify, publish, use, compile, sell, or
	distribute this software, either in source code form or as a compiled
	binary, for any purpose, commercial or non-commercial, and by any
	means.
	In jurisdictions that recognize copyright laws, the author or authors
	of this software dedicate any and all copyright interest in the
	software to the public domain. We make this dedication for the benefit
	of the public at large and to the detriment of our heirs and
	successors. We intend this dedication to be an overt act of
	relinquishment in perpetuity of all present and future rights to this
	software under copyright law.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
	OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
	ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	OTHER DEALINGS IN THE SOFTWARE.
	For more information, please refer to <http://unlicense.org/>

 2 [Public domain]                zpaq.salsa20 by D. J. Bernstein
 /// LICENSE_START.2
 /// LICENSE_END.2
	From the reference
	salsa20-ref.c version 20051118
	D. J. Bernstein
	Public domain.

 3 [Public domain]                unzpaq206.cpp by Matt Mahoney
 /// LICENSE_START.3
 /// LICENSE_END.3
	This software is provided as-is, with no warranty.
	I, Matt Mahoney, release this software into
	the public domain.   This applies worldwide.
	In some countries this may not be legally possible; if so:
	I grant anyone the right to use this software for any purpose,
	without any conditions, unless such conditions are required by law.

 4 [Public domain]                zpaq.Include mod by data man and reg2s patch from encode.su forum
    Public forum                  https://encode.su/threads/456-zpaq-updates

 5 [Public domain]                Sha1Opt.asm and 7zAsm.asm by Igor Pavlov
 /// LICENSE_START.5
 /// LICENSE_END.5
    *** NOTE: those asm are used only on Windows ***
    https://sourceforge.net/p/sevenzip/discussion/45797/thread/7d394aca49/?limit=25#521d
 	Hello, I am the developer of a little zpaq's fork (zpaqfranz)	(...)
	I integrated two of yours source code into mine	(...)
	Even if the performance increases very little (maybe 10%), can I use it?
	I think it is right to ask this explicitly before releasing the new version or zpaqfranz
	Thank you
	Franco Corbelli
	(Igor Pavlov, 2022-07-22)
		Yes, you can use any public domain code from 7-zip in any project.
	
 6 [MIT license]                  zpaq.Code from libdivsufsort 2.0 (C) Yuta Mori, 2003-2008
 /// LICENSE_START.6
 /// LICENSE_END.6
 	The MIT License (MIT)
	Copyright (c) 2003-2008 Yuta Mori All rights reserved.
	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:
	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.

 7 [MIT License]                  Embedded Artistry (memory-aligned malloc)           https://github.com/embeddedartistry
 /// LICENSE_START.7
 /// LICENSE_END.7
	MIT License
	Copyright (c) 2017 Embedded Artistry
	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:
	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.

 8 [MIT License]                  Nilsimsa implementation by Sepehr Laal              https://github.com/3p3r/nilsimsa-lite/blob/master/nilsimsa.c
 /// LICENSE_START.8
 /// LICENSE_END.8
	MIT License
	Copyright (c) 2017 Sepehr Laal
	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:
	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.

 9 [MIT License]                  zsfx by ... me                                      https://github.com/fcorbelli/zsfx
	MIT License
	Copyright (c) 2022 Franco Corbelli
	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:
	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.

10 [zlib license]                 Crc32.h Copyright (c) 2011-2019 Stephan Brumme      https://create.stephan-brumme.com/crc32/
 /// LICENSE_START.10
 /// LICENSE_END.10
	This code is licensed under the zlib License:
	This software is provided 'as-is', without any express or implied
	warranty. In no event will the authors be held liable for any damages
	arising from the use of this software.
	Permission is granted to anyone to use this software for any purpose,
	including commercial applications, and to alter it and redistribute it
	freely, subject to the following restrictions:
	1. The origin of this software must not be misrepresented; you must not
	   claim that you wrote the original software. If you use this software
	   in a product, an acknowledgment in the product documentation would be
	   appreciated but is not required.
	2. Altered source versions must be plainly marked as such, and must not be
	   misrepresented as being the original software.
	3. This notice may not be removed or altered from any source distribution.

11 [zlib license]                 part of hash-library (MD5, SHA-3) by Stephan Brumme https://github.com/stbrumme/hash-library
 /// LICENSE_START.11
 /// LICENSE_END.11
	zlib License
	Copyright (c) 2014,2015 Stephan Brumme
	This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
	Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
	1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
	   If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
	2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
	3. This notice may not be removed or altered from any source distribution.

12 [zlib license]                 crc32c.c Copyright (C) Mark Adler              https://github.com/madler/brotli/blob/master/crc32c.c
 /// LICENSE_START.12
 /// LICENSE_END.12
	crc32c.c -- compute CRC-32C using the Intel crc32 instruction
	Copyright (C) 2013, 2015, 2021 Mark Adler
	This software is provided 'as-is', without any express or implied
	warranty.  In no event will the author be held liable for any damages
	arising from the use of this software.
	Permission is granted to anyone to use this software for any purpose,
	including commercial applications, and to alter it and redistribute it
	freely, subject to the following restrictions:
	1. The origin of this software must not be misrepresented; you must not
	 claim that you wrote the original software. If you use this software
	 in a product, an acknowledgment in the product documentation would be
	 appreciated but is not required.
	2. Altered source versions must be plainly marked as such, and must not be
	 misrepresented as being the original software.
	3. This notice may not be removed or altered from any source distribution.
	Mark Adler
	madler@alumni.caltech.edu

13 [The Unlicense]                wyhash (experimental) WangYi                        https://github.com/wangyi-fudan/wyhash
 /// LICENSE_START.13
 /// LICENSE_END.13
	This is free and unencumbered software released into the public domain.
	Anyone is free to copy, modify, publish, use, compile, sell, or
	distribute this software, either in source code form or as a compiled
	binary, for any purpose, commercial or non-commercial, and by any
	means.
	In jurisdictions that recognize copyright laws, the author or authors
	of this software dedicate any and all copyright interest in the
	software to the public domain. We make this dedication for the benefit
	of the public at large and to the detriment of our heirs and
	successors. We intend this dedication to be an overt act of
	relinquishment in perpetuity of all present and future rights to this
	software under copyright law.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
	OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
	ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	OTHER DEALINGS IN THE SOFTWARE.
	For more information, please refer to <http://unlicense.org/>

14 [BSD 2-Clause license]         xxHash Copyright (C) 2012-2020 Yann Collet          https://github.com/memcached/memcached/blob/master/xxhash.h
 /// LICENSE_START.14
 /// LICENSE_END.14
	xxHash - Extremely Fast Hash algorithm
	Header File
	Copyright (C) 2012-2020 Yann Collet
	BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions are
	met:
	   * Redistributions of source code must retain the above copyright
		 notice, this list of conditions and the following disclaimer.
	   * Redistributions in binary form must reproduce the above
		 copyright notice, this list of conditions and the following disclaimer
		 in the documentation and/or other materials provided with the
		 distribution.
	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	You can contact the author at:
	  - xxHash homepage: https://www.xxhash.com
	  - xxHash source repository: https://github.com/Cyan4973/xxHash

15 [CC0 1.0 / Apache License 2.0] BLAKE3 hasher                                       https://github.com/BLAKE3-team/BLAKE3
 /// LICENSE_START.15
 /// LICENSE_END.15
	This work is released into the public domain with CC0 1.0. Alternatively, it is
	licensed under the Apache License 2.0.
	-------------------------------------------------------------------------------
	Creative Commons Legal Code
	CC0 1.0 Universal
		CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
		LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
		ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
		INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
		REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
		PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
		THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
		HEREUNDER.
	Statement of Purpose
	The laws of most jurisdictions throughout the world automatically confer
	exclusive Copyright and Related Rights (defined below) upon the creator
	and subsequent owner(s) (each and all, an "owner") of an original work of
	authorship and/or a database (each, a "Work").
	Certain owners wish to permanently relinquish those rights to a Work for
	the purpose of contributing to a commons of creative, cultural and
	scientific works ("Commons") that the public can reliably and without fear
	of later claims of infringement build upon, modify, incorporate in other
	works, reuse and redistribute as freely as possible in any form whatsoever
	and for any purposes, including without limitation commercial purposes.
	These owners may contribute to the Commons to promote the ideal of a free
	culture and the further production of creative, cultural and scientific
	works, or to gain reputation or greater distribution for their Work in
	part through the use and efforts of others.
	For these and/or other purposes and motivations, and without any
	expectation of additional consideration or compensation, the person
	associating CC0 with a Work (the "Affirmer"), to the extent that he or she
	is an owner of Copyright and Related Rights in the Work, voluntarily
	elects to apply CC0 to the Work and publicly distribute the Work under its
	terms, with knowledge of his or her Copyright and Related Rights in the
	Work and the meaning and intended legal effect of CC0 on those rights.
	1. Copyright and Related Rights. A Work made available under CC0 may be
	protected by copyright and related or neighboring rights ("Copyright and
	Related Rights"). Copyright and Related Rights include, but are not
	limited to, the following:
	  i. the right to reproduce, adapt, distribute, perform, display,
		 communicate, and translate a Work;
	 ii. moral rights retained by the original author(s) and/or performer(s);
	iii. publicity and privacy rights pertaining to a person's image or
		 likeness depicted in a Work;
	 iv. rights protecting against unfair competition in regards to a Work,
		 subject to the limitations in paragraph 4(a), below;
	  v. rights protecting the extraction, dissemination, use and reuse of data
		 in a Work;
	 vi. database rights (such as those arising under Directive 96/9/EC of the
		 European Parliament and of the Council of 11 March 1996 on the legal
		 protection of databases, and under any national implementation
		 thereof, including any amended or successor version of such
		 directive); and
	vii. other similar, equivalent or corresponding rights throughout the
		 world based on applicable law or treaty, and any national
		 implementations thereof.
	2. Waiver. To the greatest extent permitted by, but not in contravention
	of, applicable law, Affirmer hereby overtly, fully, permanently,
	irrevocably and unconditionally waives, abandons, and surrenders all of
	Affirmer's Copyright and Related Rights and associated claims and causes
	of action, whether now known or unknown (including existing as well as
	future claims and causes of action), in the Work (i) in all territories
	worldwide, (ii) for the maximum duration provided by applicable law or
	treaty (including future time extensions), (iii) in any current or future
	medium and for any number of copies, and (iv) for any purpose whatsoever,
	including without limitation commercial, advertising or promotional
	purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
	member of the public at large and to the detriment of Affirmer's heirs and
	successors, fully intending that such Waiver shall not be subject to
	revocation, rescission, cancellation, termination, or any other legal or
	equitable action to disrupt the quiet enjoyment of the Work by the public
	as contemplated by Affirmer's express Statement of Purpose.
	3. Public License Fallback. Should any part of the Waiver for any reason
	be judged legally invalid or ineffective under applicable law, then the
	Waiver shall be preserved to the maximum extent permitted taking into
	account Affirmer's express Statement of Purpose. In addition, to the
	extent the Waiver is so judged Affirmer hereby grants to each affected
	person a royalty-free, non transferable, non sublicensable, non exclusive,
	irrevocable and unconditional license to exercise Affirmer's Copyright and
	Related Rights in the Work (i) in all territories worldwide, (ii) for the
	maximum duration provided by applicable law or treaty (including future
	time extensions), (iii) in any current or future medium and for any number
	of copies, and (iv) for any purpose whatsoever, including without
	limitation commercial, advertising or promotional purposes (the
	"License"). The License shall be deemed effective as of the date CC0 was
	applied by Affirmer to the Work. Should any part of the License for any
	reason be judged legally invalid or ineffective under applicable law, such
	partial invalidity or ineffectiveness shall not invalidate the remainder
	of the License, and in such case Affirmer hereby affirms that he or she
	will not (i) exercise any of his or her remaining Copyright and Related
	Rights in the Work or (ii) assert any associated claims and causes of
	action with respect to the Work, in either case contrary to Affirmer's
	express Statement of Purpose.
	4. Limitations and Disclaimers.
	 a. No trademark or patent rights held by Affirmer are waived, abandoned,
		surrendered, licensed or otherwise affected by this document.
	 b. Affirmer offers the Work as-is and makes no representations or
		warranties of any kind concerning the Work, express, implied,
		statutory or otherwise, including without limitation warranties of
		title, merchantability, fitness for a particular purpose, non
		infringement, or the absence of latent or other defects, accuracy, or
		the present or absence of errors, whether or not discoverable, all to
		the greatest extent permissible under applicable law.
	 c. Affirmer disclaims responsibility for clearing rights of other persons
		that may apply to the Work or any use thereof, including without
		limitation any person's Copyright and Related Rights in the Work.
		Further, Affirmer disclaims responsibility for obtaining any necessary
		consents, permissions or other rights required for any use of the
		Work.
	 d. Affirmer understands and acknowledges that Creative Commons is not a
		party to this document and has no duty or obligation with respect to
		this CC0 or use of the Work.
	-------------------------------------------------------------------------------
									 Apache License
							   Version 2.0, January 2004
							http://www.apache.org/licenses/
	   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
	   1. Definitions.
		  "License" shall mean the terms and conditions for use, reproduction,
		  and distribution as defined by Sections 1 through 9 of this document.
		  "Licensor" shall mean the copyright owner or entity authorized by
		  the copyright owner that is granting the License.
		  "Legal Entity" shall mean the union of the acting entity and all
		  other entities that control, are controlled by, or are under common
		  control with that entity. For the purposes of this definition,
		  "control" means (i) the power, direct or indirect, to cause the
		  direction or management of such entity, whether by contract or
		  otherwise, or (ii) ownership of fifty percent (50%) or more of the
		  outstanding shares, or (iii) beneficial ownership of such entity.
		  "You" (or "Your") shall mean an individual or Legal Entity
		  exercising permissions granted by this License.
		  "Source" form shall mean the preferred form for making modifications,
		  including but not limited to software source code, documentation
		  source, and configuration files.
		  "Object" form shall mean any form resulting from mechanical
		  transformation or translation of a Source form, including but
		  not limited to compiled object code, generated documentation,
		  and conversions to other media types.
		  "Work" shall mean the work of authorship, whether in Source or
		  Object form, made available under the License, as indicated by a
		  copyright notice that is included in or attached to the work
		  (an example is provided in the Appendix below).
		  "Derivative Works" shall mean any work, whether in Source or Object
		  form, that is based on (or derived from) the Work and for which the
		  editorial revisions, annotations, elaborations, or other modifications
		  represent, as a whole, an original work of authorship. For the purposes
		  of this License, Derivative Works shall not include works that remain
		  separable from, or merely link (or bind by name) to the interfaces of,
		  the Work and Derivative Works thereof.
		  "Contribution" shall mean any work of authorship, including
		  the original version of the Work and any modifications or additions
		  to that Work or Derivative Works thereof, that is intentionally
		  submitted to Licensor for inclusion in the Work by the copyright owner
		  or by an individual or Legal Entity authorized to submit on behalf of
		  the copyright owner. For the purposes of this definition, "submitted"
		  means any form of electronic, verbal, or written communication sent
		  to the Licensor or its representatives, including but not limited to
		  communication on electronic mailing lists, source code control systems,
		  and issue tracking systems that are managed by, or on behalf of, the
		  Licensor for the purpose of discussing and improving the Work, but
		  excluding communication that is conspicuously marked or otherwise
		  designated in writing by the copyright owner as "Not a Contribution."
		  "Contributor" shall mean Licensor and any individual or Legal Entity
		  on behalf of whom a Contribution has been received by Licensor and
		  subsequently incorporated within the Work.
	   2. Grant of Copyright License. Subject to the terms and conditions of
		  this License, each Contributor hereby grants to You a perpetual,
		  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
		  copyright license to reproduce, prepare Derivative Works of,
		  publicly display, publicly perform, sublicense, and distribute the
		  Work and such Derivative Works in Source or Object form.
	   3. Grant of Patent License. Subject to the terms and conditions of
		  this License, each Contributor hereby grants to You a perpetual,
		  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
		  (except as stated in this section) patent license to make, have made,
		  use, offer to sell, sell, import, and otherwise transfer the Work,
		  where such license applies only to those patent claims licensable
		  by such Contributor that are necessarily infringed by their
		  Contribution(s) alone or by combination of their Contribution(s)
		  with the Work to which such Contribution(s) was submitted. If You
		  institute patent litigation against any entity (including a
		  cross-claim or counterclaim in a lawsuit) alleging that the Work
		  or a Contribution incorporated within the Work constitutes direct
		  or contributory patent infringement, then any patent licenses
		  granted to You under this License for that Work shall terminate
		  as of the date such litigation is filed.
	   4. Redistribution. You may reproduce and distribute copies of the
		  Work or Derivative Works thereof in any medium, with or without
		  modifications, and in Source or Object form, provided that You
		  meet the following conditions:
		  (a) You must give any other recipients of the Work or
			  Derivative Works a copy of this License; and
		  (b) You must cause any modified files to carry prominent notices
			  stating that You changed the files; and
		  (c) You must retain, in the Source form of any Derivative Works
			  that You distribute, all copyright, patent, trademark, and
			  attribution notices from the Source form of the Work,
			  excluding those notices that do not pertain to any part of
			  the Derivative Works; and
		  (d) If the Work includes a "NOTICE" text file as part of its
			  distribution, then any Derivative Works that You distribute must
			  include a readable copy of the attribution notices contained
			  within such NOTICE file, excluding those notices that do not
			  pertain to any part of the Derivative Works, in at least one
			  of the following places: within a NOTICE text file distributed
			  as part of the Derivative Works; within the Source form or
			  documentation, if provided along with the Derivative Works; or,
			  within a display generated by the Derivative Works, if and
			  wherever such third-party notices normally appear. The contents
			  of the NOTICE file are for informational purposes only and
			  do not modify the License. You may add Your own attribution
			  notices within Derivative Works that You distribute, alongside
			  or as an addendum to the NOTICE text from the Work, provided
			  that such additional attribution notices cannot be construed
			  as modifying the License.
		  You may add Your own copyright statement to Your modifications and
		  may provide additional or different license terms and conditions
		  for use, reproduction, or distribution of Your modifications, or
		  for any such Derivative Works as a whole, provided Your use,
		  reproduction, and distribution of the Work otherwise complies with
		  the conditions stated in this License.
	   5. Submission of Contributions. Unless You explicitly state otherwise,
		  any Contribution intentionally submitted for inclusion in the Work
		  by You to the Licensor shall be under the terms and conditions of
		  this License, without any additional terms or conditions.
		  Notwithstanding the above, nothing herein shall supersede or modify
		  the terms of any separate license agreement you may have executed
		  with Licensor regarding such Contributions.
	   6. Trademarks. This License does not grant permission to use the trade
		  names, trademarks, service marks, or product names of the Licensor,
		  except as required for reasonable and customary use in describing the
		  origin of the Work and reproducing the content of the NOTICE file.
	   7. Disclaimer of Warranty. Unless required by applicable law or
		  agreed to in writing, Licensor provides the Work (and each
		  Contributor provides its Contributions) on an "AS IS" BASIS,
		  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
		  implied, including, without limitation, any warranties or conditions
		  of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
		  PARTICULAR PURPOSE. You are solely responsible for determining the
		  appropriateness of using or redistributing the Work and assume any
		  risks associated with Your exercise of permissions under this License.
	   8. Limitation of Liability. In no event and under no legal theory,
		  whether in tort (including negligence), contract, or otherwise,
		  unless required by applicable law (such as deliberate and grossly
		  negligent acts) or agreed to in writing, shall any Contributor be
		  liable to You for damages, including any direct, indirect, special,
		  incidental, or consequential damages of any character arising as a
		  result of this License or out of the use or inability to use the
		  Work (including but not limited to damages for loss of goodwill,
		  work stoppage, computer failure or malfunction, or any and all
		  other commercial damages or losses), even if such Contributor
		  has been advised of the possibility of such damages.
	   9. Accepting Warranty or Additional Liability. While redistributing
		  the Work or Derivative Works thereof, You may choose to offer,
		  and charge a fee for, acceptance of support, warranty, indemnity,
		  or other liability obligations and/or rights consistent with this
		  License. However, in accepting such obligations, You may act only
		  on Your own behalf and on Your sole responsibility, not on behalf
		  of any other Contributor, and only if You agree to indemnify,
		  defend, and hold each Contributor harmless for any liability
		  incurred by, or claims asserted against, such Contributor by reason
		  of your accepting any such warranty or additional liability.
	   END OF TERMS AND CONDITIONS
	   APPENDIX: How to apply the Apache License to your work.
		  To apply the Apache License to your work, attach the following
		  boilerplate notice, with the fields enclosed by brackets "[]"
		  replaced with your own identifying information. (Don't include
		  the brackets!)  The text should be enclosed in the appropriate
		  comment syntax for the file format. We also recommend that a
		  file or class name and description of purpose be included on the
		  same "printed page" as the copyright notice for easier
		  identification within third-party archives.
	   Copyright 2019 Jack O'Connor and Samuel Neves
	   Licensed under the Apache License, Version 2.0 (the "License");
	   you may not use this file except in compliance with the License.
	   You may obtain a copy of the License at
		   http://www.apache.org/licenses/LICENSE-2.0
	   Unless required by applicable law or agreed to in writing, software
	   distributed under the License is distributed on an "AS IS" BASIS,
	   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	   See the License for the specific language governing permissions and
	   limitations under the License.

16 [Public domain]  Whirlpool by Paulo Barreto and Vincent Rijmen       https://web.archive.org/web/20171129084214/http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html
 /// LICENSE_START.16
 /// LICENSE_END.16
	The reference implementations are in the public domain.
	But before you go and use it, please read the accompanying disclaimer:
	THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
	OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
	EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

17 [almost-unrestricted]		  Twofish implementation,(c) 2002 by Niels Ferguson   https://github.com/wernerd/ZRTPCPP/blob/master/cryptcommon/twofish.c
	**** This is part of currently not released zpaqfranz (future zpaq-over-IP)
	* Fast, portable, and easy-to-use Twofish implementation,
	* Version 0.3.
	* Copyright (c) 2002 by Niels Ferguson.

	The author hereby grants a perpetual license to everybody to
	use this code for any purpose as long as the copyright message is included
	in the source code of this or any derived work.

	Yes, this means that you, your company, your club, and anyone else
	can use this code anywhere you want. You can change it and distribute it
	under the GPL, include it in your commercial product without releasing
	the source code, put it on the web, etc.
	The only thing you cannot do is remove my copyright message,
	or distribute any source code based on this implementation that does not
	include my copyright message.

	I appreciate a mention in the documentation or credits,
	but I understand if that is difficult to do.
	I also appreciate it if you tell me where and why you used my code.

18 [Apache License 2.0] HighWay64 hasher                                       https://github.com/google/highwayhash
 /// LICENSE_START.18
 /// LICENSE_END.18
      Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

19 [Public domain]                The files in this directory are released to the Public Domain.
    *** NOTE: PDC is used only on Windows ***
 /// LICENSE_START.19
 /// LICENSE_END.19
    github     	                  https://github.com/Bill-Gray/PDCursesMod

	In particular				  https://github.com/Bill-Gray/PDCursesMod/tree/master/wincon
								  Windows console port was originally provided by Chris Szurgot szurgot@itribe.net
	Legal Stuff
	The core package is in the public domain, but small portions of PDCursesMod are subject
	to copyright under various licenses.
	Each directory contains a README file,
	with a section titled "Distribution Status" which describes the status
	of the files in that directory.

20 [Public domain]                CPU accelerated SHA code taken from SHA-Intrinsics
 /// LICENSE_START.20
 /// LICENSE_END.20
    github                        https://github.com/noloader/SHA-Intrinsics
	
    Copyright © 2022 Jeffrey Walton <noloader@gmail.com>
    sha1-x86.c - Intel SHA extensions using C intrinsics
    Written and place in public domain by Jeffrey Walton
    Based on code from Intel, and by Sean Gulley for
    the miTLS project.

21 [Public domain]                zpaqd v7.15 - ZPAQ compression development tool - Aug. 17, 2016.
 /// LICENSE_START.21
 /// LICENSE_END.21
	This software is provided as-is, with no warranty.
	I, Matt Mahoney, release this software into
	the public domain.   This applies worldwide.
	In some countries this may not be legally possible; if so:
	I grant anyone the right to use this software for any purpose,
	without any conditions, unless such conditions are required by law.

22 [BSD 2-Clause license]                LZ4 Copyright (C) 2011-2023, Yann Collet https://github.com/lz4/lz4
 /// LICENSE_START.22
 /// LICENSE_END.22

   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
   met:

       * Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
       * Redistributions in binary form must reproduce the above
   copyright notice, this list of conditions and the following disclaimer
   in the documentation and/or other materials provided with the
   distribution.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 [MIT License]                https://github.com/codewithnick/ascii-art
 /// LICENSE_START.23
 /// LICENSE_END.23

MIT License

Copyright (c) 2024 codewithnick

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

1. The above copyright notice and this permission notice shall be included in
   all copies or substantial portions of the Software.

2. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.

   _____ _____  ______ ______ _______ _____ _   _  _____  _____ 
  / ____|  __ \|  ____|  ____|__   __|_   _| \ | |/ ____|/ ____|
 | |  __| |__) | |__  | |__     | |    | | |  \| | |  __| (___  
 | | |_ |  _  /|  __| |  __|    | |    | | | . ` | | |_ |\___ \ 
 | |__| | | \ \| |____| |____   | |   _| |_| |\  | |__| |____) |
  \_____|_|  \_\______|______|  |_|  |_____|_| \_|\_____|_____/ 
                                                                


 0 ****** It is worth mentioning that the initial developer, 
   ****** the one who has the most credit is
   ****** Dr. Matt Mahoney             http://mattmahoney.net
   ****** If you like zpaq technology, HE is the one to thank
 
 1 Thanks to JFLarvoire 								for usefun (yes, usefun) informations https://github.com/JFLarvoire/SysToolsLib/blob/master/C/MsvcLibX/src/readlink.c
 2 Thanks to Bulat Ziganshin 							for contribution on Slicing-by-16 for crc32
 3 Thanks to SeDD user of the encode.ru forum   		for SFX debugging
 4 Thanks to Aki  user of forums.debian.net     		for some Debian's packager help
 5 Thanks to https://github.com/dertuxmalwieder 		for testing on various Unixes and OpenBSD port
 6 Thanks to Felix Palmen <zirias@freebsd.org>  		for great help on FreeBSD "packaging"
 7 Thanks to https://github.com/omar-polo       		for a merged-unmerged-hardcoded NOJIT fix
 8 Thanks to https://github.com/Piqlet          		for non-x86 help
 9 Thanks to https://github.com/osmano807       		for non-x86 help
10 Thanks to Stephen Kitt <skitt@debian.org>    		for supporting Debian "packaging"
11 Thanks to Niels Ferguson                     		for the Twofish implementation
12 Thanks to Newcastle University						for some winsock related issues, Master Degree, Game Engineering
13 Thanks to https://github.com/akumiszcza      		for OneDrive issue
14 Thanks to https://github.com/ratay           		for help help fix, longpath
15 Thanks to https://github.com/graphixillusion 		for "lost" -vss
16 Thanks to https://discuss.haiku-os.org/u/PulkoMandy 	for Haiku help
17 Thanks to https://github.com/Bill-Gray/				for PDCursesMod
18 Thanks to https://github.com/justinormont            for the proposed Homebrew install formula for macOS and x64 Linux
19 Thanks to https://github.com/alebcay                 for coding the Homebrew install formula for macOS and x64 Linux
20 Thanks to https://github.com/ZhongRuoyu				for __linux__ instead of older #defines
21 Thanks to Coody user of encode.su					for unexistent folder bug
22 Thanks to https://github.com/ruptotus				for "hidden" overloaded fwrite() function bug, and -dryrun on robocopy fix
23 Thanks to Karl Wagner								for typo fixing and various suggestions
24 Thanks to https://github.com/Erol-2022               for Windows 7 console-bug fixing
25 Thanks to Martin Pluskal                             for OpenSUSE package
26 Thanks to Petr Pisar                                 for Fedora Package
27 Thanks to Davide Moretti                             for -home
28 Thanks to https://github.com/DetourNetworkUK         for Mac PowerPC strnlen bug
29 Thanks to Lone_Wolf (bbs.archlinux.org)              for reviewing PKGBUILD on arch
30 Thanks to Scimmia   (bbs.archlinux.org)              for reviewing PKGBUILD on arch
31 Thanks to Loqs      (bbs.archlinux.org)              for reviewing PKGBUILD on arch
32 Thanks to https://github.com/tansy                   for Slackware older compilers
33 Thanks to https://github.com/janko-js                for idea on quick collision-detector
34 Thanks to https://github.com/havocesp                for very useful ideas
35 Thanks to https://github.com/luckman212              for a refactoring-induced bug detection
36 Thanks to whiskytechfred user of the encode.su forum for truncate-touching
37 Thanks to Takayuki Matsuoka                          for LZ4 streaming API example : line-by-line logfile 
38 Thanks to whiskytechfred user of the encode.ru forum for vss filename fix
39 Thanks to https://github.com/sergeevabc              for suggestions on hash command
40 Thanks to https://github.com/gitboogey               for ideas on -test and -verify with vss
41 Thanks to https://github.com/bastiple                for -D_FORTIFY_SOURCE=3
42 Thanks to https://github.com/sheckandar              for Synology 7.1 issue
43 Thanks to https://github.com/adamantida              for improved similarity with zpaq for archives with only deletions
44 Thanks to https://github.com/kskarlatos              for giving me an idea to improve stdin support
45 Thanks to https://github.com/codewithnick            for change his license to a Fedora-friendly one
46 Thanks to https://github.com/mirogeorg               for various suggestions
47 Thanks to https://github.com/brad0                   for OpenBSD fix
48 Thanks to Carlo, Debian user                         for debugging support
49 Thanks to https://github.com/KnightAR                for -stdin bug from 60.7 to 60.8

                _____ _   _  _____ _______       _      _
               |_   _| \ | |/ ____|__   __|/\   | |    | |
                 | | |  \| | (___    | |  /  \  | |    | |
                 | | | . ` |\___ \   | | / /\ \ | |    | |
                _| |_| |\  |____) |  | |/ ____ \| |____| |____
               |_____|_| \_|_____/   |_/_/    \_\______|______|


===============================================================================
[1] Fastest: 	binary packages (sometimes not the most updated)
OpenBSD: 		pkg_add zpaqfranz
FreeBSD: 		pkg install zpaqfranz
MacOS: 			brew install zpaqfranz  (same for Ubuntu 20 x64)
OpenSUSE:       sudo zypper install zpaqfranz
===============================================================================


===============================================================================
[2] Very fast: 	binary packages for various platform (almost the latest)
Sourceforge:	https://sourceforge.net/projects/zpaqfranz/files/
===============================================================================


===============================================================================
[3] Quick and dirty: download Makefile and source code from github
(pre-requisite: working C++ compiler like g++ or clang, make, wget)

```
wget https://github.com/fcorbelli/zpaqfranz/raw/main/zpaqfranz.cpp
wget https://github.com/fcorbelli/zpaqfranz/raw/main/NONWINDOWS/Makefile
make install clean
```

WARNING: the Makefile is almost "universal",
BUT
beware of path: sometimes you need to fix /usr/local/bin to /usr/bin
in the line BINDIR=, it is the prefix

```
wget http://www.francocorbelli.it/Makefile -O Makefile
```

Why cc for a C++ file? LSS ancient-backward-compatibility

CC?=            cc
INSTALL?=       install
RM?=            rm
PROG=           zpaqfranz
CFLAGS+=        -O3 -Dunix
LDADD=          -pthread -lstdc++ -lm
BINDIR=         /usr/local/bin
BSD_INSTALL_PROGRAM?=   install -m 0555

all:    build

build:  ${PROG}

install:        ${PROG}
	${BSD_INSTALL_PROGRAM} ${PROG} ${DESTDIR}${BINDIR}

${PROG}:        ${OBJECTS}
	${CC}  ${CFLAGS} zpaqfranz.cpp -o ${PROG} ${LDADD}
clean:
	${RM} -f ${PROG}

===============================================================================


===============================================================================
[4] Dirtiest (!), the "nightly build"
(pre-requisite: working C++ compiler like g++ or clang, wget, NO MAKE NEEDED)

*** WARNING This version is practically the n-1,
*** may contain experimental code, compilation incompatibilities etc.
*** Almost the bleeding edge
```
wget http://www.francocorbelli.it/zpaqfranz.cpp -O zpaqfranz.cpp
```

then... build (aka: compile)


  _    _  ______          __  _______ ____    ____  _    _ _____ _      _____
 | |  | |/ __ \ \        / / |__   __/ __ \  |  _ \| |  | |_   _| |    |  __ \
 | |__| | |  | \ \  /\  / /     | | | |  | | | |_) | |  | | | | | |    | |  | |
 |  __  | |  | |\ \/  \/ /      | | | |  | | |  _ <| |  | | | | | |    | |  | |
 | |  | | |__| | \  /\  /       | | | |__| | | |_) | |__| |_| |_| |____| |__| |
 |_|  |_|\____/   \/  \/        |_|  \____/  |____/ \____/|_____|______|_____/



My main development platforms are AMD Windows
(non-Intel Windows (arm) currently unsupported) 
and Intel FreeBSD.

I rarely use Linux or MacOS or whatever (for compiling),
so fixing may be needed.

As explained the program is single file, be careful to link the pthread library.
You need it for ESXi too, even if it doesn't work. Don't be afraid, zpaqfranz knows!

Library dependencies are minimal:   libc,libc++,libcxxrt,libm,libgcc_s,libthr

DEFINEs at compile-time: IT IS UP TO YOU NOT TO MIX LOGICAL INCOMPATIBLE DEFINITIONS!

(nothing)							// Compile for INTEL Windows
-DHWBLAKE3 blake3_windows_gnu.S		// On Win64 enable HW accelerated BLAKE3 (with assembly)
-DHWSHA1							// On Win64 enable HW SHA1 (-flaghw)
-DHWSHA2							// Enable HW SHA2 (without assembly code to be linked)
-Dunix 								// Compile on "something different from Windows"
-DSOLARIS        					// Solaris is similar, but not equal, to BSD Unix

-DNOJIT								// By default zpaqfranz works on Intel CPUs
									// (for simplicity I'll call them Intel, meaning x86-SSE2 and amd64)
									// On non-Intel a -NOJIT should runs fine on LITTLE ENDIANs
									// like Linux aarch64, Android aarch64 etc
									// On BIG ENDIAN or "strange things" like middle endian
									// (Honeywell 316) or little word (PDP-11)
									// the autotest command is for you :)
									// https://gcc.gnu.org/legacy-ml/gcc-help/2007-07/msg00343.html
									
									// From 60.9s if compiled without -DNOJIT there is a switch -nojit
									// Translation: even if you have an executable compiled with default
									// you can turn off the JIT

-DANCIENT							// Turn off some functions for compiling in very old systems
									// consuming less RAM (ex. PowerPC Mac, Synology 7.1), no auto C++

-DNAS								// Like ANCIENT, but a bit less stringent (ex. Synology 7.2)

-DBIG								// Turn on BIG ENDIAN at compile time

-DDEBUG								// Old 7.15, almost useless (turn on asserts). Use -debug(s) switches instead

-DESX								// Yes, zpaqfranz run (kind of) on ESXi too :-)

-DALIGNMALLOC 						// Force malloc to be aligned at something (sparc64). Use naive CRC-32

-DSERVER							// Enable the cloudpaq client (for Windows)

-DIPV6								// Do not force IPv4 (the current default)

-DGUI								// Enable the gui (ncurses on Windows)

HIDDEN GEMS
If the (non Windows) executable is named "dir" act (just about)... like Windows' dir
Beware of collisions with other software "dir"

If the (non Windows) executable is named "robocopy" runs... some kind of robocopy-like.
ex robocopy /tmp/zp /tmp/backup1 /tmp/backup2
BEWARE: those are WET RUNS (-kill automagically enabled), with -space enabled!


WARNINGS
Some strange warnings with some compilers (too old, or too new), not MY fault

My very own reporting
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101558

Original bug
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96963


STRANGE THINGS (FAQ)

NOTE1: -, not -- (into switch)

NOTE2: switches ARE case sensitive.   -maxsize <> -MAXSIZE

THE JIT (just-in-time)
zpaqfranz translate (by default) ZPAQL opcodes into "real" Intel (amd64 or x86+SSE2) machine code.
On other systems a -DNOJIT (ARM/Apple CPUs for example) will enforce software interpretation.
I write it BIG, #1 FAQ with newer Macintosh (M1/M2) is forgetting -DNOJIT

  _   _  ___  _   _   ___ _   _ _____ _____ _           __          ____  _   _  ___      _ ___ _____ 
 | \ | |/ _ \| \ | | |_ _| \ | |_   _| ____| |      ____\ \        |  _ \| \ | |/ _ \    | |_ _|_   _|
 |  \| | | | |  \| |  | ||  \| | | | |  _| | |     |_____\ \  _____| | | |  \| | | | |_  | || |  | |  
 | |\  | |_| | |\  |  | || |\  | | | | |___| |___  |_____/ / |_____| |_| | |\  | |_| | |_| || |  | |  
 |_| \_|\___/|_| \_| |___|_| \_| |_| |_____|_____|      /_/        |____/|_| \_|\___/ \___/|___| |_|  
                                                                                                      
Starting from version 60.9, the same zpaqfranz code is used even if compiled **without** `-DNOJIT`. 
In this case, you can disable it with the `-nojit` switch. 
Remember that JIT availability does not affect or slow down **compression** but has 
a significant impact on **decompression**.

To summarize: If you’re sure your system does not support JIT (for example Apple silicon), 
compile with `-DNOJIT` as before.
Otherwise, compile: zpaqfranz will "automagically" turn on -nojit if CPU (or OS) is not OK. 
If, during extraction, the process fails because the JIT is kaputt,
add the `-nojit` switch (e.g., zpaqfranz x z:\pippo.zpaq -to z:\ugo -nojit).

Why this change? Because there are Intel platforms (which do support JIT) 
where executing code from allocated memory is not allowed for security reasons. 
Classic examples include certain BSD types (OpenBSD, NetBSD...). 
In such cases, even if the CPU is compatible, the operating system is not.
Not my fault :)


SHA-1 HARDWARE ACCELERATION
Some CPUs does have SHA instructions (typically AMD, not very widespread on Intel).
So you can use a piece of 7-zip by Igor Pavlov (I am sure you know 7z) that is
not really useful, but just for fun (faster BUT with higher latency).
For performances reason, no run-time CPU compatibility checks, must be turn on
via optional -hw switch
On AMD 5950X runs ~1.86 GB/s vs ~951 MB/s
The obj can be assembled from the fixed source code with asmc64
https://github.com/nidud/asmc
asmc64.exe sha1ugo.asm
Then link the .obj and compile with -DHWSHA1
Short version:  not worth the effort for the GA release
From build 58+ there is a new -DHWSHA2, without linking of asm, that accelerate SHA256 too.

STATIC LINKING
I like -static very much, there are a thousand arguments as to whether it is good or not.
There are strengths and weaknesses.
Normally I prefer it, you do as you prefer.

TO BE NATIVE OR NOT TO BE?
The -march=native  is a switch that asks the compiler to activate all possible
optimizations for the CPU on which zpaqfranz is being compiled.
This is to obtain the maximum possible performance,
while binding the executable to the processor.
It should not be used if you intend, for some reason,
to transfer the object program to a different system.
If you are compiling from source you can safely use it.
BTW on my PC native is faster in benchmark, but slower in real-world compression (!).

CLANG OR GCC?
It is hard to choose between these two compilers. 
I generally prefer gcc for better performance. 
However, this is not true all the time; it depends on a thousand things, including the CPU type. 
For example on Arch and AMD 7950, running inside VM,
clang 17.0.6 (4992) is much faster than gcc 14.1.1 (4584).
BUT with -march=native
gcc 14.1.1 (5595) is way faster clang 17.0.6 (5044)

Short version: test yourself. 
The b (benchmark) command is there for you.

-O2 or -O3
Who knows. Try yourself :)

OTHER COMPILERS
I do not know. Try yourself :)

DEBIAN (and derivates)
Debian does not "like" anything embedded https://wiki.debian.org/EmbeddedCopies
zpaqfranz (on Windows) have two different SFX modules (32 and 64).
It is possible to make a Debian-package-compliant source code
with some sed (or a single sed -e) (of course remove the |)
sed -i "/DEBIAN|START/,/\/\/\/DEBIA|NEND/d"  zpaqfranz.cpp
sed -i "s/\/\/\/char ext|ract_test1/char ext|ract_test1/g" zpaqfranz.cpp
Actually, the code is inside #ifdef _WIN32, so it will be skipped on Debian 
and in general every non-Windows.

*NIX AND DOUBLE QUOTES
Please, on non-Windows systems, DO NOT FORGET THE DOUBLE QUOTES, 
especially with multipart files.
"test_????.zpaq" is good
test_????.zpaq   is BAD


NOTE: from 59_3 you need to link urlmon (with a -lurlmon) on Windows

TARGET EXAMPLES
```
Windows 64 (g++ 7.3.0)
g++ -O3  zpaqfranz.cpp -o zpaqfranz -lurlmon

Windows 64 (g++ 10.3.0) MSYS2
g++ -O3  zpaqfranz.cpp -o zpaqfranz -pthread -static -lurlmon

Windows 64 (g++, Hardware Blake3 implementation)
In this case, of course, linking the .S file is mandatory
g++ -O3 -DHWBLAKE3 blake3_windows_gnu.S zpaqfranz.cpp -o zpaqfranz -pthread -static -lurlmon

Windows 64 (g++, Hardware Blake3 implementation PLUS HW SHA1)
g++ -O3 -DHWBLAKE3 -DHWSHA1 blake3_windows_gnu.s zpaqfranz.cpp sha1ugo.obj -o zpaqfranzhw -pthread -static -lurlmon

Windows 64 (g++, Hardware Blake3 implementation PLUS HW SHA1/2)
g++ -O3 -DHWBLAKE3 -DHWSHA2 blake3_windows_gnu.s zpaqfranz.cpp -o zpaqfranzhw -pthread -static -lurlmon

Windows 64 (g++, Hardware Blake3 implementation PLUS HW SHA1/2 with GUI)
g++ -O3 -DGUI -DHWBLAKE3 -DHWSHA2 blake3_windows_gnu.s zpaqfranz.cpp -o zpaqfranzhw -pthread -static -s -lurlmon

Windows 32 (g++ 7.3.0 64 bit)
c:\mingw32\bin\g++ -m32 -O3 zpaqfranz.cpp -o zpaqfranz32 -pthread -static -lurlmon

Windows 64 (g++ 7.3.0), WITH cloud paq
g++ -O3 -DSERVER zpaqfranz.cpp -o zpaqfranz -lwsock32 -lws2_32 -lurlmon

FreeBSD (11.x) gcc 7
gcc7 -O3 -Dunix zpaqfranz.cpp -lstdc++ -pthread -o zpaqfranz -static -lm

FreeBSD (12.1) gcc 9.3.0
g++ -O3 -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz -static-libstdc++ -static-libgcc

FreeBSD (11.4) gcc 10.2.0
g++ -O3 -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz -static-libstdc++ -static-libgcc -Wno-stringop-overflow

FreeBSD (11.3) clang 6.0.0
clang++ -O3 -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz -static

OpenBSD 6.6 clang++ 8.0.1
OpenBSD 7.1 clang++ 13.0.0
WARNING: with very old g++ compiler try -DANCIENT
****
Please note: you can get memory error, without -DNOJIT, on "strange" (non FreeBSD) machines
because mmap does not like PROT_EXEC. On newer zpaqfranz use -nojit switch
****
clang++ -Dunix -O3 zpaqfranz.cpp -o zpaqfranz -pthread -static

Arch Linux
I made a little AUR package (with some help), this one
https://aur.archlinux.org/packages/zpaqfranz-git
Should be good enough

For a fresh Manjaro 24.0.3 installation I suggest

sudo pacman -Sy
sudo pacman -S gcc (or sudo pacman -S clang)
sudo pacman -S fakeroot
sudo pacman -S yay
yay zpaqfranz

Answers to yay
Packages to install ==> 1
Packages to cleanBuild ==> A
Diffs to show? ==> N
Proceed with installation => y
(insert sudo password)

Debian Linux (10/11) gcc 8.3.0
ubuntu 21.04 desktop-amd64 gcc  10.3.0
manjaro 21.07 gcc 11.1.0
g++ -O3 -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz -static

QNAP NAS TS-431P3 (Annapurna AL314) gcc 7.4.0
g++ -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz -Wno-psabi

Fedora 34 gcc 11.2.1
Typically you will need some library (out of a fresh Fedora box)
sudo dnf install glibc-static libstdc++-static -y;
Then you can compile, via Makefile or "by hand"
(do not forget... sudo!)

CentoOS
Please note:
"Red Hat discourages the use of static linking for security reasons.
Use static linking only when necessary, especially against libraries provided by Red Hat. "
Therefore a -static linking is often a nightmare on CentOS => change the Makefile
g++ -O3 -Dunix zpaqfranz.cpp  -pthread -o zpaqfranz

Solaris 11.4 gcc 7.3.0
OmniOS r151042 gcc 7.5.0
Beware: -DSOLARIS and some different linking options
g++ -O3 -DSOLARIS zpaqfranz.cpp -o zpaqfranz  -pthread -static-libgcc -lkstat

MacOS 11.0 gcc (clang) 12.0.5, INTEL
MacOS 12.6 gcc (clang) 13.1.6, INTEL
MacOS 12.7 gcc (clang) 14.0.0, INTEL
Please note:
No -static here
"Apple does not support statically linked binaries on Mac OS X.
(...) Rather, we strive to ensure binary
compatibility in each dynamically linked system library and framework
(AHAHAHAHAHAH, note by me)
Warning: Shipping a statically linked binary entails a significant compatibility risk.
We strongly recommend that you not do this..."
Short version: Apple does not like -static
g++ -Dunix -O3 zpaqfranz.cpp -o zpaqfranz -pthread
If you want to enable all kind of warnings, turn on c++11.
Apple, by default, does NOT like "long long", at all.
As I have explained several times, I had to make compromises, such as the C++17 warnings.
g++ -Dunix -O3 zpaqfranz.cpp -o zpaqfranz -pthread -std=c++11 -Wall -Wpedantic

Mac PowerPC with gcc4.x
Look at -DBIG (for BIG ENDIAN) and -DANCIENT (old-compiler)
g++ -O3 -DBIG -DANCIENT -Dunix -DNOJIT zpaqfranz.cpp -o zpaqfranz -pthread

Apple Macintosh (M1/M2)
DO NOT FORGET THE -DNOJIT!!!!!
g++ -Dunix  -O3 -DNOJIT zpaqfranz.cpp -o zpaqfranz -pthread

ESXi (gcc 3.4.6)
Note: not fully developed ( extract() with minimum RAM need to be implemented )
g++ -O3 -DESX zpaqfranz.cpp -o zpaqfranz6  -pthread -static -s

sparc64 (not tested)
try
-DALIGNMALLOC (+ other switches)

Haiku R1/beta4, 64 bit (gcc 11.2.0), hrev56721
Not very tested
g++ -O3 -Dunix zpaqfranz.cpp -o zpaqfranz  -pthread -static

Slackware 12.0 (32 bit)
gcc 4.1.2
g++ -O3 -DANCIENT -Dunix zpaqfranz.cpp -o zpaqfranz -pthread

Slackware64 14.0
gcc 5.3.0
clang 3.8.0
g++ -O3 -Dunix zpaqfranz.cpp -o zpaqfranz -pthread -std=c++11
clang++ -O3 -Dunix zpaqfranz.cpp -o zpaqfranz -pthread -std=c++11

Slackware64 15.0
gcc 11.2.0
g++ -O3 -Dunix zpaqfranz.cpp -o zpaqfranz -pthread -lstdc++ -lm
clang 13.0.0
clang++ -O3 -Dunix zpaqfranz.cpp -o zpaqfranz -pthread

Debian 7 (wheezy) on PowerPC
gcc 4.9.1
g++ -O3 -Dunix -DBIG -DNOJIT zpaqfranz.cpp -o zpaqfranz -pthread -std=c++11

Debian 11 cross compiling to QNAP's arm
gcc 10.2.1
apt-get install gcc-arm-linux-gnueabihf
apt-get install g++-arm-linux-gnueabihf
arm-linux-gnueabihf-gcc
arm-linux-gnueabihf-g++

arm-linux-gnueabihf-g++ -O3 -DNOJIT -DANCIENT zpaqfranz.cpp -o zpaqqnapv8 -static -pthread -s -Wno-psabi


DragonFlyBSD 6.4.0
gcc 8.3
****
Please note: you can get memory error, without -DNOJIT, on "strange" (non FreeBSD) machines
****
g++ -Dunix -O3 zpaqfranz.cpp -o zpaqfranz -pthread -static

NetBSD 10
gcc 10.5.0
****
Please note: you can get memory error, without -DNOJIT, on "strange" (non FreeBSD) machines
****
g++ -DHWSHA2 -Dunix -O3 zpaqfranz.cpp -o zpaqfranz -pthread -static


HPPA gcc version 14.2.0 (Debian 14.2.0-8)
This type of CPU is quite particular, often requiring memory alignment, 
which is not very compatible with the CRC-32 calculation function using 16-byte slices. 
In this case, the "classic" algorithm is used, much slower but expected to work. 
I don't have access to hardware for thorough testing.
g++ -O3 -DBIG -DALIGNMALLOC zpaqfranz.cpp -o zpaqfranz -pthread

PowerPC (Debian gcc 4.9.1)
g++ -O3 -DBIG -DANCIENT zpaqfranz.cpp -o zpaqfranz -pthread -static -s

Alpine Linux
Sorry, cannot make pthread work
Just run with -t1 (aka: multithread is disabled)

Windows XP 
Newer zpaqfranz32.exe (>=60.10) more or less works on XP
Please do not use "strange" things (ADS & whatever)


Beware of #definitions
g++ -dM -E - < /dev/null
sometimes __sun, sometimes not




      _______ ______  _____ _______   ______ _____ _      ______ 
     |__   __|  ____|/ ____|__   __| |  ____|_   _| |    |  ____|
        | |  | |__  | (___    | |    | |__    | | | |    | |__   
        | |  |  __|  \___ \   | |    |  __|   | | | |    |  __|  
        | |  | |____ ____) |  | |    | |     _| |_| |____| |____ 
        |_|  |______|_____/   |_|    |_|    |_____|______|______|
                                                                                     

zpaqfranz has an internal self-testing mechanism, aimed at 'strange' systems, 
i.e. with CPUs operating differently from Intel, such as Apple M1, M2, 
PowerPC, sparc, ARM, BIG endians and so on.

The command autotest -to extracts a binary file, 
contained within the source, a (Windows-created) .zpaq archive 
for check whether the PAQL code (during extraction) is well processed.

It is essentially an interoperability test between Windows 
(taken as a known good-working model) and the "weird" host system.

It is packed with 256 "shuffled" pieces of the Iliade
https://www.rodoni.ch/busoni/bibliotechina/nuovifiles/iliade_h/testo.htm

Cantami, o Diva, del Pelìde Achille
l'ira funesta che infiniti addusse
lutti agli Achei, molte anzi tempo all'Orco
generose travolse alme d'eroi...

From version 55.16, instead of using deduplicated pseudorandom files, 
I preferred plaintext, in order to dispel any doubts about the contents of 
the binary archive: the theoretical coverage is minor, but it does not matter.

The resulting archive (sha256.zpaq), compressed by -m5 on Windows,
with every filenames == SHA-256(content),
is mime64-encoded and "splitted" into 4 strings,
becoming (inside the source code)

char extract_test1[]={"N2tT...
char extract_test2[]={"W1hY...
char extract_test3[]={"+Au5...
char extract_test4[]={"sFPj...

It is quickly possible to double-check this way (just in case...)

zpaqfranz autotest -to somewhere
zpaqfranz x somewhere/sha256.zpaq -to extracted

The sha256.zpaq should be this one
SHA-256: D90223FAEE2878D7854B9438864B4856A3C1F920C34EFB8C136A8949B54E5400 [            158.239]     sha256.zpaq

With these files inside
SHA-256: 00D478184C1851145A712B8054D04789DA164CDEE61EDB2240F124E0AC3501AA [             37.000]     00D478184C1851145A712B8054D04789DA164CDEE61EDB2240F124E0AC3501AA
SHA-256: 010CE956B14903A000536ACEB4B12CF503B3D84E15985A5F6C5648DC772D8B54 [             37.000]     010CE956B14903A000536ACEB4B12CF503B3D84E15985A5F6C5648DC772D8B54
SHA-256: 032EA211A2F1CA5F977A46BF3211FB9DF3DCBC29D13D6B1764EB4B2637819A6E [             37.000]     032EA211A2F1CA5F977A46BF3211FB9DF3DCBC29D13D6B1764EB4B2637819A6E
SHA-256: 0344D52DF32E001AC79B9EADEB75CD515A2325481B4A3EF5BD876B9A64915068 [             37.000]     0344D52DF32E001AC79B9EADEB75CD515A2325481B4A3EF5BD876B9A64915068
SHA-256: 078E164828817CAC9D7FF1CAE2F88C695C1059D495B17832522264E0CD8CAE86 [             37.000]     078E164828817CAC9D7FF1CAE2F88C695C1059D495B17832522264E0CD8CAE86
SHA-256: 0829C6BEA1EAEEB1303950F741079730134340B484F383F354882D81EBDCC350 [             37.000]     0829C6BEA1EAEEB1303950F741079730134340B484F383F354882D81EBDCC350
SHA-256: 08BE3D4900A6CC3E9BD4159A91EE2D86E130503A0995F21BA99DC3060F285687 [             37.000]     08BE3D4900A6CC3E9BD4159A91EE2D86E130503A0995F21BA99DC3060F285687
SHA-256: 0960E3A414FFC2FB928425C42E51992717C809A5FF4CF2F03F922D620E3A0C12 [             37.000]     0960E3A414FFC2FB928425C42E51992717C809A5FF4CF2F03F922D620E3A0C12
SHA-256: 0A179201C68752B062168579F5F2423FE0B76B7CEE519674DF018C3559FEA0DE [             37.000]     0A179201C68752B062168579F5F2423FE0B76B7CEE519674DF018C3559FEA0DE
SHA-256: 0A404D353DDE54CE863A235689D13F1F3FAEAABDFB0733B5E48787BF8B1ABAA0 [             37.000]     0A404D353DDE54CE863A235689D13F1F3FAEAABDFB0733B5E48787BF8B1ABAA0
SHA-256: 0A710AA592664A6A01C98338C46AE4F9364067CD43A224A77D4B47D02126C6D4 [             37.000]     0A710AA592664A6A01C98338C46AE4F9364067CD43A224A77D4B47D02126C6D4
SHA-256: 0B430875FEABCD1B1E20F0D4A25B6B080F16485ADEEE27A740A8CF4CE6EB5DFF [             37.000]     0B430875FEABCD1B1E20F0D4A25B6B080F16485ADEEE27A740A8CF4CE6EB5DFF
SHA-256: 0B7C69CDFDBF4D41A65EE9FCC9C290DA947C0FDDCDE7C0D8CAB20870150A97A9 [             37.000]     0B7C69CDFDBF4D41A65EE9FCC9C290DA947C0FDDCDE7C0D8CAB20870150A97A9
SHA-256: 0DC92BFBA489354B5BF53F8AADD17C9C4659524747F467FD85ADE369834B6379 [             37.000]     0DC92BFBA489354B5BF53F8AADD17C9C4659524747F467FD85ADE369834B6379
SHA-256: 0E082AFA0578C194400693A23B477BA8A9B309A5AF60E2BE54D8695D8BC08112 [             37.000]     0E082AFA0578C194400693A23B477BA8A9B309A5AF60E2BE54D8695D8BC08112
SHA-256: 0E505B858ED58BA66402789B512D136BBA22F16DF9CD22CACCABF122AA0B6962 [             37.000]     0E505B858ED58BA66402789B512D136BBA22F16DF9CD22CACCABF122AA0B6962
SHA-256: 0F5E547E80F985B3F3C13EF0658D3D3C376387FB0B67A9545F0DF09F012C8EE4 [             37.000]     0F5E547E80F985B3F3C13EF0658D3D3C376387FB0B67A9545F0DF09F012C8EE4
SHA-256: 108AAB75EE08118321A705601435EDCAC957120B70A0E2FF6AF292F1BC32B159 [             37.000]     108AAB75EE08118321A705601435EDCAC957120B70A0E2FF6AF292F1BC32B159
SHA-256: 10CCE2C6F78285C608158A5A0EFF34BF004E528E42FC646B485F5BA683403C13 [             37.000]     10CCE2C6F78285C608158A5A0EFF34BF004E528E42FC646B485F5BA683403C13
SHA-256: 10DEBF94E56498EB24725F471DE3CA7C0839A388DE130CDE923D20F10FF5366B [             37.000]     10DEBF94E56498EB24725F471DE3CA7C0839A388DE130CDE923D20F10FF5366B
SHA-256: 13355FF10342D3521A87570D377CFA94151697E335AEC6FDA898FE5EDB38EB60 [             37.000]     13355FF10342D3521A87570D377CFA94151697E335AEC6FDA898FE5EDB38EB60
SHA-256: 15C367F88C3A0550E10253DD69F76B7876D02FF72DEB028EEEBEFB5FD41C2B29 [             37.000]     15C367F88C3A0550E10253DD69F76B7876D02FF72DEB028EEEBEFB5FD41C2B29
SHA-256: 165120A494D7B6E6E9E3D5AC33AD00B0E5BA9362BBFF062CC2F8297D007D3790 [             37.000]     165120A494D7B6E6E9E3D5AC33AD00B0E5BA9362BBFF062CC2F8297D007D3790
SHA-256: 16C8EDAEA5AB72A5BECF00E3395908ABB727E2C005C34AACA9EC5A89E1F839BB [             37.000]     16C8EDAEA5AB72A5BECF00E3395908ABB727E2C005C34AACA9EC5A89E1F839BB
SHA-256: 1812B7923D633AA2C688A6FB2E2B9B56C61BE416C7CEFDD5C11C64D62655855F [             37.000]     1812B7923D633AA2C688A6FB2E2B9B56C61BE416C7CEFDD5C11C64D62655855F
SHA-256: 1B9E31E11D96A5EF19F71E5E005EF89C8343C9E49F053FD1E27B8B48FD107B13 [             37.000]     1B9E31E11D96A5EF19F71E5E005EF89C8343C9E49F053FD1E27B8B48FD107B13
SHA-256: 1BD84476ADFCEBD6459DC7FF6C4E8591A0196A461B7A16849E4825E8323A2C84 [             37.000]     1BD84476ADFCEBD6459DC7FF6C4E8591A0196A461B7A16849E4825E8323A2C84
SHA-256: 1D1155709A45EBA6FE76DA1BDE8130D4DA0B6621D9D17411E8D9F41490AF0ADD [             37.000]     1D1155709A45EBA6FE76DA1BDE8130D4DA0B6621D9D17411E8D9F41490AF0ADD
SHA-256: 1DF209DB3D40B3988B75122DCD45FD1D84DC87D4A21415DB1CB7E894D4755213 [             37.000]     1DF209DB3D40B3988B75122DCD45FD1D84DC87D4A21415DB1CB7E894D4755213
SHA-256: 1FC14CD3E1D394DB285B7113EFA5765455516C72C6B2ADD2710ABF810EBB5591 [             37.000]     1FC14CD3E1D394DB285B7113EFA5765455516C72C6B2ADD2710ABF810EBB5591
SHA-256: 1FD808BD5631008B0F4B10FB3050A017A9D6AFA03EC2F99186E8D1980E458153 [             37.000]     1FD808BD5631008B0F4B10FB3050A017A9D6AFA03EC2F99186E8D1980E458153
SHA-256: 210B63FA532B0CA36974F2E4208591CB84ABA9C599E8178A6FC5E5260E19353F [             37.000]     210B63FA532B0CA36974F2E4208591CB84ABA9C599E8178A6FC5E5260E19353F
SHA-256: 21B095AF58EE64FF3E75CAE759BF8D854E85830152ABDB50BF8E177DE2C38BB5 [             37.000]     21B095AF58EE64FF3E75CAE759BF8D854E85830152ABDB50BF8E177DE2C38BB5
SHA-256: 234CAB0BD56614D8891934E71891B0FBFE1781350EB25EBCE0ED9FA398EF8A5F [             37.000]     234CAB0BD56614D8891934E71891B0FBFE1781350EB25EBCE0ED9FA398EF8A5F
SHA-256: 24A966A5D474A7A850688563489FEF0DAEC981CB4F3CBB7E1767ABBEEDBE722E [             37.000]     24A966A5D474A7A850688563489FEF0DAEC981CB4F3CBB7E1767ABBEEDBE722E
SHA-256: 252B1DB9F19113AEE3575B805B4B6BA768B4FC93EDB05A8E6EC61650FDF39D2B [             37.000]     252B1DB9F19113AEE3575B805B4B6BA768B4FC93EDB05A8E6EC61650FDF39D2B
SHA-256: 25D8AEE1BDE16464F72E0A7BBA0550F0BD843D1FF1BA817D8877739A6FB0CDDA [             37.000]     25D8AEE1BDE16464F72E0A7BBA0550F0BD843D1FF1BA817D8877739A6FB0CDDA
SHA-256: 26BFB864EEC92283547953EB66009274DE9E25C89C15C11571543925AEB06CFC [             37.000]     26BFB864EEC92283547953EB66009274DE9E25C89C15C11571543925AEB06CFC
SHA-256: 26FD9CBE82F02D2F1D40EC7D61F64992475005B071555FF0F7891BC90ED07DFF [             37.000]     26FD9CBE82F02D2F1D40EC7D61F64992475005B071555FF0F7891BC90ED07DFF
SHA-256: 27074DA8B7C6D97E09092CA36C801AB00E00DFE89A2DBFE3D0F9064C7B538AA9 [             37.000]     27074DA8B7C6D97E09092CA36C801AB00E00DFE89A2DBFE3D0F9064C7B538AA9
SHA-256: 2756249F7862D948B8DADD7D6B94F2EABF616AE9BDD4FBB4E5D74B4F216988DF [             37.000]     2756249F7862D948B8DADD7D6B94F2EABF616AE9BDD4FBB4E5D74B4F216988DF
SHA-256: 27C54140CF47AA5B7D469FCABBBFEEA666D23C427A636C869350FCEA46C892B0 [             37.000]     27C54140CF47AA5B7D469FCABBBFEEA666D23C427A636C869350FCEA46C892B0
SHA-256: 28A1435A0ECC1A521150DDECE1E1F213B42CBC2FF754824A8A0177DE2A4319C4 [             37.000]     28A1435A0ECC1A521150DDECE1E1F213B42CBC2FF754824A8A0177DE2A4319C4
SHA-256: 2D8294F94A7B49E8A1CEEC70A9E204DAC69016ED681B07099370BBEB3D82352A [             37.000]     2D8294F94A7B49E8A1CEEC70A9E204DAC69016ED681B07099370BBEB3D82352A
SHA-256: 2F70BDF9416D3C0A2D00DC9526F8A95E0DF437B862B3DC8EE10418E58D73FADE [             37.000]     2F70BDF9416D3C0A2D00DC9526F8A95E0DF437B862B3DC8EE10418E58D73FADE
SHA-256: 3089FB37D22A22682CAA80F56B73DD7843A0C2567AB7373295F60340A8DE1038 [             37.000]     3089FB37D22A22682CAA80F56B73DD7843A0C2567AB7373295F60340A8DE1038
SHA-256: 30C5E6037FCCC63994DB761FAF3D4E7ED5BABFC3D62F9C560058791437C6EFE4 [             37.000]     30C5E6037FCCC63994DB761FAF3D4E7ED5BABFC3D62F9C560058791437C6EFE4
SHA-256: 31528C0C41EEA6E4234D1939DAC42BC389B6A9B1B2E4AA92918D30B7DF1587F2 [             37.000]     31528C0C41EEA6E4234D1939DAC42BC389B6A9B1B2E4AA92918D30B7DF1587F2
SHA-256: 341F785E400CABD0688623FF248496BDA776D979FD184A07EC2A17CB1F9D44D1 [             37.000]     341F785E400CABD0688623FF248496BDA776D979FD184A07EC2A17CB1F9D44D1
SHA-256: 3464F4CD9F1DE1E989FA8F07CB614A6324AF9121AE908A60137606CBE3866350 [             37.000]     3464F4CD9F1DE1E989FA8F07CB614A6324AF9121AE908A60137606CBE3866350
SHA-256: 36BBB282BD737FFE945AF89A7108B70E626A980E8EA7B9A28F595AFE1908576A [             37.000]     36BBB282BD737FFE945AF89A7108B70E626A980E8EA7B9A28F595AFE1908576A
SHA-256: 37C94F56F8193D38C53846C05ED64C325E3CE31B83FD579F6FE4424D80AE2990 [             37.000]     37C94F56F8193D38C53846C05ED64C325E3CE31B83FD579F6FE4424D80AE2990
SHA-256: 389CD2ACD8D2615C24233B16B6743B7CCFAF647D9071E3C6085F9407AE21EDA2 [             37.000]     389CD2ACD8D2615C24233B16B6743B7CCFAF647D9071E3C6085F9407AE21EDA2
SHA-256: 3900A9A0227F16E20D73349CC5C35071AA7F074FF126F50D6D688DC68390F19D [             37.000]     3900A9A0227F16E20D73349CC5C35071AA7F074FF126F50D6D688DC68390F19D
SHA-256: 39343E6C33FD0F90C17A97148F94EE829F0E4B1CB2BAD01193A1CFE220AAFE52 [             37.000]     39343E6C33FD0F90C17A97148F94EE829F0E4B1CB2BAD01193A1CFE220AAFE52
SHA-256: 39C3B48B173EA632FA8D5F024E3073E84F5027C4E7830DF3844CBDADD564C2B5 [             37.000]     39C3B48B173EA632FA8D5F024E3073E84F5027C4E7830DF3844CBDADD564C2B5
SHA-256: 3B195045FE2AD01D640130A57775C456F9535946DEE65F5D960C6D3DD31F9E2A [             37.000]     3B195045FE2AD01D640130A57775C456F9535946DEE65F5D960C6D3DD31F9E2A
SHA-256: 3B7F9DFDAC53BEF07CF14030DFB0196AB8245B079B7BFB1DBBAA453677D0EAC1 [             37.000]     3B7F9DFDAC53BEF07CF14030DFB0196AB8245B079B7BFB1DBBAA453677D0EAC1
SHA-256: 3BD4034DE33995B16866BD38101DD3E74CE7422CF4CA264C33C91D162A8F7B70 [             37.000]     3BD4034DE33995B16866BD38101DD3E74CE7422CF4CA264C33C91D162A8F7B70
SHA-256: 3BE3FD05735B114206ABAC244D2990BC031DE034968848C27E58441DB0157FB8 [             37.000]     3BE3FD05735B114206ABAC244D2990BC031DE034968848C27E58441DB0157FB8
SHA-256: 3BE556B87E0F6B64576646F8D1915DE5166F850730711A48A546589CA7A116F9 [             37.000]     3BE556B87E0F6B64576646F8D1915DE5166F850730711A48A546589CA7A116F9
SHA-256: 3D4E6466502B9E393D833C61FE220D63A503855D5C2AEF061DEBAD645307E6C2 [             37.000]     3D4E6466502B9E393D833C61FE220D63A503855D5C2AEF061DEBAD645307E6C2
SHA-256: 3D94BB464584A7AFFEC863D45FBE635E26E2E1936C76A280960D9FD51C85EB4D [             37.000]     3D94BB464584A7AFFEC863D45FBE635E26E2E1936C76A280960D9FD51C85EB4D
SHA-256: 3D98E7FC5B59489DE3FC5C2B39FAC5CAAD25F22E2FFD9DBF07C938029651FE80 [             37.000]     3D98E7FC5B59489DE3FC5C2B39FAC5CAAD25F22E2FFD9DBF07C938029651FE80
SHA-256: 3E4A8132599AED13297DB87E69861257AF0253F050EF9A791907A9F7760E1E09 [             37.000]     3E4A8132599AED13297DB87E69861257AF0253F050EF9A791907A9F7760E1E09
SHA-256: 3F3AFC1BC3DAD1A75AB79497BA4A7D795BC208DCF9CF8C3CCE01E75022D25C53 [             37.000]     3F3AFC1BC3DAD1A75AB79497BA4A7D795BC208DCF9CF8C3CCE01E75022D25C53
SHA-256: 3FA10C9DF65691A1686059CCF4EAB20BF5E4EBD360A6663CDF88AE80D24593D2 [             37.000]     3FA10C9DF65691A1686059CCF4EAB20BF5E4EBD360A6663CDF88AE80D24593D2
SHA-256: 446F672B5C291D11EE997A3BD6B0479C23179F28FB5D0D284CC35875727BDE73 [             37.000]     446F672B5C291D11EE997A3BD6B0479C23179F28FB5D0D284CC35875727BDE73
SHA-256: 447E42ADEB6C7EBCD3ED68DCB8B7C91729227B3482CCF2392EC8ECB41C702E8B [             37.000]     447E42ADEB6C7EBCD3ED68DCB8B7C91729227B3482CCF2392EC8ECB41C702E8B
SHA-256: 45441B49D27BB4D48D26253901488318700DDA5E64E7475D46CC69529A6E1B5D [             37.000]     45441B49D27BB4D48D26253901488318700DDA5E64E7475D46CC69529A6E1B5D
SHA-256: 45DF6FD68BF0B4AB153492FB7492BC7CC518EEB4D77999F6838667D0CBB612E0 [             37.000]     45DF6FD68BF0B4AB153492FB7492BC7CC518EEB4D77999F6838667D0CBB612E0
SHA-256: 462DA79124E977C9AFE38D773DE68178A14044B666C6CE3F58632DF2B64FE303 [             37.000]     462DA79124E977C9AFE38D773DE68178A14044B666C6CE3F58632DF2B64FE303
SHA-256: 4639FAE9FEB3AC00269AF6CFB8FDDA191C3FFD51E7958B2C36123856E4652DAA [             37.000]     4639FAE9FEB3AC00269AF6CFB8FDDA191C3FFD51E7958B2C36123856E4652DAA
SHA-256: 474E4473A1BA757D56456B70C1B45A09E5EEDA080882C527FAD642774D769891 [             37.000]     474E4473A1BA757D56456B70C1B45A09E5EEDA080882C527FAD642774D769891
SHA-256: 475EE529339349676C3C54CF7AB24B35D06163AED1888D23D8655F28AD6CA77C [             37.000]     475EE529339349676C3C54CF7AB24B35D06163AED1888D23D8655F28AD6CA77C
SHA-256: 47C6CD8953A795F630BC41CF3FAEF03742129CA779404C9E7384B73049EC710B [             37.000]     47C6CD8953A795F630BC41CF3FAEF03742129CA779404C9E7384B73049EC710B
SHA-256: 49BE9B32041ABAC22FFAA72A1DC14EF0E3EC5AC2BF2CB6A7276C1EA245FDEDFB [             37.000]     49BE9B32041ABAC22FFAA72A1DC14EF0E3EC5AC2BF2CB6A7276C1EA245FDEDFB
SHA-256: 4A9812764E2B1C34A22D298DA9BAB0032DB90C951BD890D903BE260A1842A0DF [             37.000]     4A9812764E2B1C34A22D298DA9BAB0032DB90C951BD890D903BE260A1842A0DF
SHA-256: 4BFDDC3ABF535FFFFE0597F3888C98836ABC39D263D965B56965309C1E97517A [             37.000]     4BFDDC3ABF535FFFFE0597F3888C98836ABC39D263D965B56965309C1E97517A
SHA-256: 4C39512F3778F92125BC22AE35CF128542B417B08FAE57BB9FA2B08BD53FB604 [             37.000]     4C39512F3778F92125BC22AE35CF128542B417B08FAE57BB9FA2B08BD53FB604
SHA-256: 4D54FCF77DBBA3143F67EDDB073C598D161E8E4EBFA6F1296D3EFF7FB14DA9C5 [             37.000]     4D54FCF77DBBA3143F67EDDB073C598D161E8E4EBFA6F1296D3EFF7FB14DA9C5
SHA-256: 50D7BF4E0E45BE449F054B756AE0D5BF615219AE40D46C63A5BEBC33AF525A73 [             37.000]     50D7BF4E0E45BE449F054B756AE0D5BF615219AE40D46C63A5BEBC33AF525A73
SHA-256: 519E27584C601906882D7CEC1BA6DBF7FE156A940A9DC39F6CDBECF43D5A1FDC [             37.000]     519E27584C601906882D7CEC1BA6DBF7FE156A940A9DC39F6CDBECF43D5A1FDC
SHA-256: 526911B84F03A0C25B4D5FC23DD45DA5AD54CF20581006AC97B270AE943ADCF3 [             37.000]     526911B84F03A0C25B4D5FC23DD45DA5AD54CF20581006AC97B270AE943ADCF3
SHA-256: 529CAAC78E5277E4D142F8261E53E6570C92ACA496F7A5DC8CF4DB674CFF9447 [             37.000]     529CAAC78E5277E4D142F8261E53E6570C92ACA496F7A5DC8CF4DB674CFF9447
SHA-256: 544C881F08F27C926F3AFFC81C0CA4AACDCB8A0B748B8B5D685BE690CFEF8D5A [             37.000]     544C881F08F27C926F3AFFC81C0CA4AACDCB8A0B748B8B5D685BE690CFEF8D5A
SHA-256: 548F141A26913632C46AA174A36CFBD40AB0D6A968E83054AF35A3B5CEFAE1BA [             37.000]     548F141A26913632C46AA174A36CFBD40AB0D6A968E83054AF35A3B5CEFAE1BA
SHA-256: 55D7218CA360C0967A8D3C012FDBFB1EDE923BB6DC87244C2F6BC2944D449AD3 [             37.000]     55D7218CA360C0967A8D3C012FDBFB1EDE923BB6DC87244C2F6BC2944D449AD3
SHA-256: 55E4CF5A088772EB94C4E6DDF0DB5381171F37E5B82ED363056CE22B426B13CB [             37.000]     55E4CF5A088772EB94C4E6DDF0DB5381171F37E5B82ED363056CE22B426B13CB
SHA-256: 5655700594E3E8057480DF1DB7395339FADEAD7600DE9FE1E704D3B37FDF3483 [             37.000]     5655700594E3E8057480DF1DB7395339FADEAD7600DE9FE1E704D3B37FDF3483
SHA-256: 568860708726F8F86A8C64118DD81E1977A1068D501FA2C108D29F0E680D321E [             37.000]     568860708726F8F86A8C64118DD81E1977A1068D501FA2C108D29F0E680D321E
SHA-256: 56FB8A04E25A63192E9E540BC9FC4FD8DD50789EFC58337B0A7F8526FE2F95FA [             37.000]     56FB8A04E25A63192E9E540BC9FC4FD8DD50789EFC58337B0A7F8526FE2F95FA
SHA-256: 57A53D9564669D01DCE99B44A012CCD503D5727868C7B6DC5E3976AEF7397351 [             37.000]     57A53D9564669D01DCE99B44A012CCD503D5727868C7B6DC5E3976AEF7397351
SHA-256: 57E24ECBA9F66747653EB16B09A4E252F6DDC4C1677C9A8547A283DF9A5CEB47 [             37.000]     57E24ECBA9F66747653EB16B09A4E252F6DDC4C1677C9A8547A283DF9A5CEB47
SHA-256: 585FF005A50313F25EDA6A7E8BF31BCB61543C13E4156233C38A9CFBF98A91B1 [             37.000]     585FF005A50313F25EDA6A7E8BF31BCB61543C13E4156233C38A9CFBF98A91B1
SHA-256: 5869F14D80E849BFCC43130983B64A9622FA1426E5CB4AD870E173AC5E2C881C [             37.000]     5869F14D80E849BFCC43130983B64A9622FA1426E5CB4AD870E173AC5E2C881C
SHA-256: 591F77D4E0D9A290D5928491661F6BC57FBB6F5D2D22C3CC21A2D82DBC0CB789 [             37.000]     591F77D4E0D9A290D5928491661F6BC57FBB6F5D2D22C3CC21A2D82DBC0CB789
SHA-256: 5A0E32FD143AD117EBE107823C0168BC04228AAF21703009C077A41B0FBF457D [             37.000]     5A0E32FD143AD117EBE107823C0168BC04228AAF21703009C077A41B0FBF457D
SHA-256: 5A55CA569AA57411613844F1E908E7356C03136ECED223512CC34998968E1201 [             37.000]     5A55CA569AA57411613844F1E908E7356C03136ECED223512CC34998968E1201
SHA-256: 5AE15A0204325884D4C9FDAF524E6A39019EEA61D177C2749A485C15960C51D2 [             37.000]     5AE15A0204325884D4C9FDAF524E6A39019EEA61D177C2749A485C15960C51D2
SHA-256: 5CF9EF180B74B15F6B12C23F6EEE683FF2E21188844D364907BFB61D96108528 [             37.000]     5CF9EF180B74B15F6B12C23F6EEE683FF2E21188844D364907BFB61D96108528
SHA-256: 5D631323E6C1CF993EB0722A5CECFC7B13E7FA34A60970841F39502543B9A553 [             37.000]     5D631323E6C1CF993EB0722A5CECFC7B13E7FA34A60970841F39502543B9A553
SHA-256: 5DC3470EBC48F08A5F09D48306CAFDBA04A91FEE69983E066D261AB51C9862CE [             37.000]     5DC3470EBC48F08A5F09D48306CAFDBA04A91FEE69983E066D261AB51C9862CE
SHA-256: 5DE49182F8A1D29C04201C681FC04980B53A06FE25319BCF874AEA24397323F8 [             37.000]     5DE49182F8A1D29C04201C681FC04980B53A06FE25319BCF874AEA24397323F8
SHA-256: 5E9ACB1D0E299FBE610A7BD551B4AB624F058B1AB1FDF0B05C75BCC963E8EEED [             37.000]     5E9ACB1D0E299FBE610A7BD551B4AB624F058B1AB1FDF0B05C75BCC963E8EEED
SHA-256: 5FB79AF55A64980E27C8EBEDDA7A65D53091B8558EDC01AEFEC34716420B26B5 [             37.000]     5FB79AF55A64980E27C8EBEDDA7A65D53091B8558EDC01AEFEC34716420B26B5
SHA-256: 61A20B5037DEEC703B9E0420437B9227D53E2CF10165CEF6A881A95988F3FD50 [             37.000]     61A20B5037DEEC703B9E0420437B9227D53E2CF10165CEF6A881A95988F3FD50
SHA-256: 62237E28DA9E8348BF4FD60816014DE340272EE3AF71D29AA846B2F17070FC60 [             37.000]     62237E28DA9E8348BF4FD60816014DE340272EE3AF71D29AA846B2F17070FC60
SHA-256: 632D576C07DC4F0387CE451E4C9485F561CA374BC2CC6B4B3544A546DED2679A [             37.000]     632D576C07DC4F0387CE451E4C9485F561CA374BC2CC6B4B3544A546DED2679A
SHA-256: 64C51FDE6036CFEACF8793B20EC596CB420A2CB91487101E4048DE120EC9FFE9 [             37.000]     64C51FDE6036CFEACF8793B20EC596CB420A2CB91487101E4048DE120EC9FFE9
SHA-256: 65300EB7090569287D264479CB84ACC2091867391F6151EC7CC17041DC4DBD9D [             37.000]     65300EB7090569287D264479CB84ACC2091867391F6151EC7CC17041DC4DBD9D
SHA-256: 659690FAA0059BD9487FC7081F057699774737F071C7081EB541A3B7329A4F47 [             37.000]     659690FAA0059BD9487FC7081F057699774737F071C7081EB541A3B7329A4F47
SHA-256: 65FDC24F04EFDE97623AD9A3B1EEBF4B1AE5C92BA1B7006C18690B6DF3192070 [             37.000]     65FDC24F04EFDE97623AD9A3B1EEBF4B1AE5C92BA1B7006C18690B6DF3192070
SHA-256: 6662229A32542A195E82D016C2CAA47C73733D13F395C7AB12DF838F19A7245B [             37.000]     6662229A32542A195E82D016C2CAA47C73733D13F395C7AB12DF838F19A7245B
SHA-256: 66A92DF3DFE7ECC7429FF033CF5BFBAE8F3B9D019403BA8BA0835B0F2CCEF98C [             37.000]     66A92DF3DFE7ECC7429FF033CF5BFBAE8F3B9D019403BA8BA0835B0F2CCEF98C
SHA-256: 67827811F2CB17E03415DE6E46EC2FE7561A7E07790FE1E97807C0F778760BCE [             37.000]     67827811F2CB17E03415DE6E46EC2FE7561A7E07790FE1E97807C0F778760BCE
SHA-256: 6AAAE32CBA8AA049FEAFE093E9EC9C4617534A7F37A4F90B81F6F27003C43C03 [             37.000]     6AAAE32CBA8AA049FEAFE093E9EC9C4617534A7F37A4F90B81F6F27003C43C03
SHA-256: 6B7273DD6F79E6F3D4353989FB07983B5F47DFEF3946909BD7DA990C74708002 [             37.000]     6B7273DD6F79E6F3D4353989FB07983B5F47DFEF3946909BD7DA990C74708002
SHA-256: 6DEDFAB01FE0573D34AF4546FFDC27341C985111473F6EB047731B3465AFCDCA [             37.000]     6DEDFAB01FE0573D34AF4546FFDC27341C985111473F6EB047731B3465AFCDCA
SHA-256: 706FED95A8978923F550C6C802F617A47C94CF7FA015353D59F01525D0109F7C [             37.000]     706FED95A8978923F550C6C802F617A47C94CF7FA015353D59F01525D0109F7C
SHA-256: 714D71D28B1ADE8FDEA1DBC297EE155FCF5318A97B0E561650C7B781778217BC [             37.000]     714D71D28B1ADE8FDEA1DBC297EE155FCF5318A97B0E561650C7B781778217BC
SHA-256: 7297DAF3C58DDDDEFCEF35296A90DD1DBFF8817682420F6B4E43BB2E1F02F153 [             37.000]     7297DAF3C58DDDDEFCEF35296A90DD1DBFF8817682420F6B4E43BB2E1F02F153
SHA-256: 72DAF5A9F35AEB68EE403C1E711ED408E2BF19D1D42DAD6BE68EDEDFA30E8F82 [             37.000]     72DAF5A9F35AEB68EE403C1E711ED408E2BF19D1D42DAD6BE68EDEDFA30E8F82
SHA-256: 744BABE76DE3DCB47E6EB868387591A543011514C842F8C1CA701E0247D9973A [             37.000]     744BABE76DE3DCB47E6EB868387591A543011514C842F8C1CA701E0247D9973A
SHA-256: 752571BDBB1D25A5AF56C96144E67BDCD8FDBA1CB2DA988C7727D5B4B71F1CC1 [             37.000]     752571BDBB1D25A5AF56C96144E67BDCD8FDBA1CB2DA988C7727D5B4B71F1CC1
SHA-256: 77161315B7C3E9510C66514B7D6B144381B5F738411E5F5885E7906BF65ADF8A [             37.000]     77161315B7C3E9510C66514B7D6B144381B5F738411E5F5885E7906BF65ADF8A
SHA-256: 78A1EA5E4AF17BDC4E322D1EB9753633C27C6B8846FA25CEF93A12532208F6A0 [             37.000]     78A1EA5E4AF17BDC4E322D1EB9753633C27C6B8846FA25CEF93A12532208F6A0
SHA-256: 7DBACBF41154D95C4D694AAA3579C0159DFB409FE0CA3529596554AC2C4EA256 [             37.000]     7DBACBF41154D95C4D694AAA3579C0159DFB409FE0CA3529596554AC2C4EA256
SHA-256: 7F028BAA27EA9043E3E263913A53CC5E4059A4F8F4D0E06932552FAA777A42CC [             37.000]     7F028BAA27EA9043E3E263913A53CC5E4059A4F8F4D0E06932552FAA777A42CC
SHA-256: 7F94A6F0857C988EB107EF6A65559DC7B37A4D9BCBBBB94564244D61B8AB4B77 [             37.000]     7F94A6F0857C988EB107EF6A65559DC7B37A4D9BCBBBB94564244D61B8AB4B77
SHA-256: 7FB17B349C66A0CD28A6A796BF262ADD149A70DF496E15177D5315CBA2EDF9A5 [             37.000]     7FB17B349C66A0CD28A6A796BF262ADD149A70DF496E15177D5315CBA2EDF9A5
SHA-256: 80BCC74AA133D46752E346EC74AA050227B247DC79624065719294D49D284AE9 [             37.000]     80BCC74AA133D46752E346EC74AA050227B247DC79624065719294D49D284AE9
SHA-256: 816BF2C6564FBD09FF2A2DE8B54684F19664F9D9274D87966C46FA768F0FB101 [             37.000]     816BF2C6564FBD09FF2A2DE8B54684F19664F9D9274D87966C46FA768F0FB101
SHA-256: 81858D5E5EFC2A41A99EBC871AC25FE622B4E2601F7896FCF7DC7CB7844946BB [             37.000]     81858D5E5EFC2A41A99EBC871AC25FE622B4E2601F7896FCF7DC7CB7844946BB
SHA-256: 82041513A6416E9105FB4BE6DDEC0A301B200E3D0A051DAB56DF599E7D058C33 [             37.000]     82041513A6416E9105FB4BE6DDEC0A301B200E3D0A051DAB56DF599E7D058C33
SHA-256: 83634D72BF480913A52F55DD7312273A4889E238BB31105FD6FF9292B9575E53 [             37.000]     83634D72BF480913A52F55DD7312273A4889E238BB31105FD6FF9292B9575E53
SHA-256: 85DA854914FC7AECB458C9887FE9E3E8EADB5D9A51729FD4023E12A2A62BA84B [             37.000]     85DA854914FC7AECB458C9887FE9E3E8EADB5D9A51729FD4023E12A2A62BA84B
SHA-256: 87432D9300A3D974061765BD5E46C21DBD773B7531CA1F58313FFF475D0D55F7 [             37.000]     87432D9300A3D974061765BD5E46C21DBD773B7531CA1F58313FFF475D0D55F7
SHA-256: 885537E91B77DA9E298B0827ED72951592BB0E6D48FBED9D079D7354335365CF [             37.000]     885537E91B77DA9E298B0827ED72951592BB0E6D48FBED9D079D7354335365CF
SHA-256: 885F1E5C7D3D223B10DCB8A986D7F2E443E05AF2EB7B39A815F16BE6AF8A55AC [             37.000]     885F1E5C7D3D223B10DCB8A986D7F2E443E05AF2EB7B39A815F16BE6AF8A55AC
SHA-256: 8869B76663373A64CE35E531AA3BF3248F8919C99BFE72D8925A4761BD292981 [             37.000]     8869B76663373A64CE35E531AA3BF3248F8919C99BFE72D8925A4761BD292981
SHA-256: 889A86F182D19AACA016F89E8F8A12508FEDF87A387D0A2F4314CB240C205AB2 [             37.000]     889A86F182D19AACA016F89E8F8A12508FEDF87A387D0A2F4314CB240C205AB2
SHA-256: 89FDFCB3293555F11F9B1F8D3F7F7039AB49C5772131DE80FD50BD26575F8FDA [             37.000]     89FDFCB3293555F11F9B1F8D3F7F7039AB49C5772131DE80FD50BD26575F8FDA
SHA-256: 8A20C46725F9E78C8DAAE2D749018CE4122C55976CFEE975800B8EB945AE64D3 [             37.000]     8A20C46725F9E78C8DAAE2D749018CE4122C55976CFEE975800B8EB945AE64D3
SHA-256: 8DD92BF97911C490E4AB5CCEB75DC9D3FAF32933EC8B480C2369F8CDF570187F [             37.000]     8DD92BF97911C490E4AB5CCEB75DC9D3FAF32933EC8B480C2369F8CDF570187F
SHA-256: 8DE9D087F2067E5127162F010A0E2053A597DDE796545C7A2C0E52B047E06351 [             37.000]     8DE9D087F2067E5127162F010A0E2053A597DDE796545C7A2C0E52B047E06351
SHA-256: 8E330005B0A7F402F2C6C9ACDB2B029875422B30FCE6F8C08186612E6C3512F8 [             37.000]     8E330005B0A7F402F2C6C9ACDB2B029875422B30FCE6F8C08186612E6C3512F8
SHA-256: 8F9F0F05E638D6D1A1E7BCAE7EA837C8113FEB54790F36F1EB3870933DD0A407 [             37.000]     8F9F0F05E638D6D1A1E7BCAE7EA837C8113FEB54790F36F1EB3870933DD0A407
SHA-256: 8FC82F03F95CE9FFF6A2E781415E0A903B0DEFF790C1F36FDDBE6F3D56FCD6F5 [             37.000]     8FC82F03F95CE9FFF6A2E781415E0A903B0DEFF790C1F36FDDBE6F3D56FCD6F5
SHA-256: 8FD5B0F4F0F3CF882D70723C1432ABCA933A94E2A847BE7EA9314E61F552A195 [             37.000]     8FD5B0F4F0F3CF882D70723C1432ABCA933A94E2A847BE7EA9314E61F552A195
SHA-256: 914671B460546BAF8D841BD9703176C02150590717D56124EF8B4BE9AA3E1CF7 [             37.000]     914671B460546BAF8D841BD9703176C02150590717D56124EF8B4BE9AA3E1CF7
SHA-256: 92244AB4F4AF52B7C3E5F256A924A5D1C9ED323B52CC3F14A23AEA503FA3BBC7 [             37.000]     92244AB4F4AF52B7C3E5F256A924A5D1C9ED323B52CC3F14A23AEA503FA3BBC7
SHA-256: 92F7BDEE518157ECD18968BE6E4AC17B2666FBB2A47789EFD84F05F21691200E [             37.000]     92F7BDEE518157ECD18968BE6E4AC17B2666FBB2A47789EFD84F05F21691200E
SHA-256: 934123B3BF2CE4BE47AE36BBDE8F9D23D77EAEAFA2A8B2ACD61D7AE8F577DED4 [             37.000]     934123B3BF2CE4BE47AE36BBDE8F9D23D77EAEAFA2A8B2ACD61D7AE8F577DED4
SHA-256: 93BED1842035886A3BA8BF295E35B9D9F6CD10CC3711D1DD82E851A2BD3CCE58 [             37.000]     93BED1842035886A3BA8BF295E35B9D9F6CD10CC3711D1DD82E851A2BD3CCE58
SHA-256: 93E35FD1D23278E82F57DE9C3281C9CA22622F60D8A7EDC59A4CACFF7149D009 [             37.000]     93E35FD1D23278E82F57DE9C3281C9CA22622F60D8A7EDC59A4CACFF7149D009
SHA-256: 9444E16862B2A46EB999A2E23109002F2DD63AC9F8BD3830CDADFBE414ABC38B [             37.000]     9444E16862B2A46EB999A2E23109002F2DD63AC9F8BD3830CDADFBE414ABC38B
SHA-256: 952F5E3F51D123B7F18CE5999C7261404C2BD99CCFEE31FCD7C71579F27F51F8 [             37.000]     952F5E3F51D123B7F18CE5999C7261404C2BD99CCFEE31FCD7C71579F27F51F8
SHA-256: 95AEFF7C413D4180BF1D19893F66E13DBE212DCD341349230921F8E91ECB7575 [             37.000]     95AEFF7C413D4180BF1D19893F66E13DBE212DCD341349230921F8E91ECB7575
SHA-256: 9B92039B3A8800234BF0A30D5B76C3DF717D5BC3875F9765214711851425A899 [             37.000]     9B92039B3A8800234BF0A30D5B76C3DF717D5BC3875F9765214711851425A899
SHA-256: 9C0BF94D6AEFCF1A145E52770998E29C56E341923FFB81C5ED8723FA18F59B52 [             37.000]     9C0BF94D6AEFCF1A145E52770998E29C56E341923FFB81C5ED8723FA18F59B52
SHA-256: 9F01E7BC4C2296127E12A99EB2D632EB7336B7F8FD3FB338B380C607427D8268 [             37.000]     9F01E7BC4C2296127E12A99EB2D632EB7336B7F8FD3FB338B380C607427D8268
SHA-256: A0F57829AFA6C0CEA1BABA37FF73DC9CE114B5D0D15A5A010D12C4F9F83C7AD3 [             37.000]     A0F57829AFA6C0CEA1BABA37FF73DC9CE114B5D0D15A5A010D12C4F9F83C7AD3
SHA-256: A1245F01239FA30E0D833AB8078065BFF8DE3F5DD79803A334C8872E7FFC6306 [             37.000]     A1245F01239FA30E0D833AB8078065BFF8DE3F5DD79803A334C8872E7FFC6306
SHA-256: A3D6E0C3733A036E06528F3762E89AECE4298D0602A3CF1BC8747951D3FE466F [             37.000]     A3D6E0C3733A036E06528F3762E89AECE4298D0602A3CF1BC8747951D3FE466F
SHA-256: A48845D5582B2D884F159DFCE86B4664E584E1D3CE5ED203A710D88DB0C855B6 [             37.000]     A48845D5582B2D884F159DFCE86B4664E584E1D3CE5ED203A710D88DB0C855B6
SHA-256: A4B2756FA53988A9663BE419189AC5F39FCB1D4E88FCA6573FD62F69075DDD45 [             37.000]     A4B2756FA53988A9663BE419189AC5F39FCB1D4E88FCA6573FD62F69075DDD45
SHA-256: A4F387B0878F07F4D893180CBE03682B799E915E8C0B47A0A8CDDD580B273E67 [             37.000]     A4F387B0878F07F4D893180CBE03682B799E915E8C0B47A0A8CDDD580B273E67
SHA-256: A5F6F944B998606D0AA018081B05DF7128C0CE3564DE525AD97A762DA5BEB235 [             37.000]     A5F6F944B998606D0AA018081B05DF7128C0CE3564DE525AD97A762DA5BEB235
SHA-256: A6A354D4D57412DC2D322991A6384274A1AC875B264A0BE35066918656899C20 [             37.000]     A6A354D4D57412DC2D322991A6384274A1AC875B264A0BE35066918656899C20
SHA-256: A748FA178ED7A3797150AA1FD6171B62F870D77AA12C1A93CA995982F737C348 [             37.000]     A748FA178ED7A3797150AA1FD6171B62F870D77AA12C1A93CA995982F737C348
SHA-256: A7FCF9987603F0744E163C71A97723A83BD8DED7D29DEC9CAA5A905FE5AE4EA6 [             37.000]     A7FCF9987603F0744E163C71A97723A83BD8DED7D29DEC9CAA5A905FE5AE4EA6
SHA-256: A82EAF353C17B6C91221752985C6E193FE44A4F798E8C757D0058970A378CDC1 [             37.000]     A82EAF353C17B6C91221752985C6E193FE44A4F798E8C757D0058970A378CDC1
SHA-256: A86AE1B47BF3F390D361CC4D97E4544695664BD5B77607AE9A987F847F205BAC [             37.000]     A86AE1B47BF3F390D361CC4D97E4544695664BD5B77607AE9A987F847F205BAC
SHA-256: A871EF3DE9F21D0192F34B624C414C73A4368FF1F6AD1A5297337FC68380ACE4 [             37.000]     A871EF3DE9F21D0192F34B624C414C73A4368FF1F6AD1A5297337FC68380ACE4
SHA-256: A9056DDF0D40648EFA52BFBBBC32E752E5059106F87E86F58DC2DF0B6B93FE15 [             37.000]     A9056DDF0D40648EFA52BFBBBC32E752E5059106F87E86F58DC2DF0B6B93FE15
SHA-256: AA463359197B0BE43B1A101EC200220230BC688FA6941DDB726456B1F0422804 [             37.000]     AA463359197B0BE43B1A101EC200220230BC688FA6941DDB726456B1F0422804
SHA-256: ABED942532E2E32FF3A1296CBE307C651AF4C906D9980DC740615516F457AF4A [             37.000]     ABED942532E2E32FF3A1296CBE307C651AF4C906D9980DC740615516F457AF4A
SHA-256: ADDD5910D7071C29AB1507F4971E673CF2D30FABF403793AC5B85B36DE94F975 [             37.000]     ADDD5910D7071C29AB1507F4971E673CF2D30FABF403793AC5B85B36DE94F975
SHA-256: ADF51921DA80A6BBE7B30D8CC7CB94174E3E7EBAEF8DAD44389C35882A0220BE [             37.000]     ADF51921DA80A6BBE7B30D8CC7CB94174E3E7EBAEF8DAD44389C35882A0220BE
SHA-256: AF230BB829E24655DD3654736A4F08025CEC27A2015DC80847481F72DF5465F4 [             37.000]     AF230BB829E24655DD3654736A4F08025CEC27A2015DC80847481F72DF5465F4
SHA-256: B15C62466B0AB8C4DB14A8D80DA6808EEF720F7F09AEB66663A61A1BA0533EC3 [             37.000]     B15C62466B0AB8C4DB14A8D80DA6808EEF720F7F09AEB66663A61A1BA0533EC3
SHA-256: B1F23843C8F238C08D4AEFCD64FED8C26525F826CEDBE5D15DA0C88F8C74F92A [             37.000]     B1F23843C8F238C08D4AEFCD64FED8C26525F826CEDBE5D15DA0C88F8C74F92A
SHA-256: B40B640D8D750FB3E19CDF3A86D51D7065497DF0C043B4F7778681DFE4AB7FDF [             37.000]     B40B640D8D750FB3E19CDF3A86D51D7065497DF0C043B4F7778681DFE4AB7FDF
SHA-256: B4F0CE880294117909FAD195C1132004CEA587E68592C7249EA7913DC4041D14 [             37.000]     B4F0CE880294117909FAD195C1132004CEA587E68592C7249EA7913DC4041D14
SHA-256: B587496E5038553959A20FD1286808608EC0F79C993E07C245845FF7C8C83626 [             37.000]     B587496E5038553959A20FD1286808608EC0F79C993E07C245845FF7C8C83626
SHA-256: B667B42A317718CB5294547E6A8DFA2C461EAFCE5D81409B8BD766228241FCAE [             37.000]     B667B42A317718CB5294547E6A8DFA2C461EAFCE5D81409B8BD766228241FCAE
SHA-256: B82AB78154C96B5B5BF682604FB8C256C20A3D894C6199DF7A277304D0C390FD [             37.000]     B82AB78154C96B5B5BF682604FB8C256C20A3D894C6199DF7A277304D0C390FD
SHA-256: B872FD54019BE3BCCF40D3C2E0805DF8832BABF5FA79A6ABCBD6224DB73B3AA8 [             37.000]     B872FD54019BE3BCCF40D3C2E0805DF8832BABF5FA79A6ABCBD6224DB73B3AA8
SHA-256: B8CC07752DF266ED084D4C58991CD1D7C486B7256C5ABBE30195954C5393AB22 [             37.000]     B8CC07752DF266ED084D4C58991CD1D7C486B7256C5ABBE30195954C5393AB22
SHA-256: BB8E03BA705D39FF477686157CEE4689BC4F2FF0F8AFA9B6EC22E7B76009C766 [             37.000]     BB8E03BA705D39FF477686157CEE4689BC4F2FF0F8AFA9B6EC22E7B76009C766
SHA-256: BBAB47571909E31E3E49F1B90FEB935375FCB58C0E58C28A788FF2158107BDD5 [             37.000]     BBAB47571909E31E3E49F1B90FEB935375FCB58C0E58C28A788FF2158107BDD5
SHA-256: BF24DAE09EC91C18E808C9871F8C1736C432F97034B329AC15411CFA2316B51D [             37.000]     BF24DAE09EC91C18E808C9871F8C1736C432F97034B329AC15411CFA2316B51D
SHA-256: C0060FC19C32E45CE256244FF499738B9C0FE8BF4C13D38758AB109A712E7D99 [             37.000]     C0060FC19C32E45CE256244FF499738B9C0FE8BF4C13D38758AB109A712E7D99
SHA-256: C0C9527F6A605DB0CEB46C8D791FE313ADB74C183FB24E3919953C65F4A3153B [             37.000]     C0C9527F6A605DB0CEB46C8D791FE313ADB74C183FB24E3919953C65F4A3153B
SHA-256: C0D4AB0D2F0D81E31015BB76F6B0E8C66E06DE52A976C4F81FAB103F39C4B803 [             37.000]     C0D4AB0D2F0D81E31015BB76F6B0E8C66E06DE52A976C4F81FAB103F39C4B803
SHA-256: C13A2143351DC5344A1E0DC1D397B8EACFC3E768C2CB35325B08EC7312ECEFDA [             37.000]     C13A2143351DC5344A1E0DC1D397B8EACFC3E768C2CB35325B08EC7312ECEFDA
SHA-256: C380DDE9106121E656CF93D643E4CF4AFC9519C6C48543BE9BD764CC264EF23A [             37.000]     C380DDE9106121E656CF93D643E4CF4AFC9519C6C48543BE9BD764CC264EF23A
SHA-256: C5A1F211BD2846572B77F47BDA91CE5BBEB1518AA58B4B9EBD71A8E390549F52 [             37.000]     C5A1F211BD2846572B77F47BDA91CE5BBEB1518AA58B4B9EBD71A8E390549F52
SHA-256: C5E23D66F360B83F58ADAFEA97175AB3CE772A3BE3A2DA54E7632DC262FEC745 [             37.000]     C5E23D66F360B83F58ADAFEA97175AB3CE772A3BE3A2DA54E7632DC262FEC745
SHA-256: C8A2570459BA91EF1BDCD43EF0894D192611F52067089939D1B0A35EEA6FB467 [             37.000]     C8A2570459BA91EF1BDCD43EF0894D192611F52067089939D1B0A35EEA6FB467
SHA-256: C8DC131BC15B7ECB331FD11FB7B1893134DFCF27A8FB38B2751D65CA60FE6D5C [             37.000]     C8DC131BC15B7ECB331FD11FB7B1893134DFCF27A8FB38B2751D65CA60FE6D5C
SHA-256: C9BF9A57CB0CC14E1D8DC5CBE7D648666C11A24DCEE0EC755B50FA0C8E5112A6 [             37.000]     C9BF9A57CB0CC14E1D8DC5CBE7D648666C11A24DCEE0EC755B50FA0C8E5112A6
SHA-256: CA45B6A7A32709EAC16B758A4D9224B24C05D7A3A55EBC60CA3590C0E1FC2AD0 [             37.000]     CA45B6A7A32709EAC16B758A4D9224B24C05D7A3A55EBC60CA3590C0E1FC2AD0
SHA-256: CC98269F1547CAC14F6E18D907CA3020084B2B43EED33C027EA7C4E0CADFC632 [             37.000]     CC98269F1547CAC14F6E18D907CA3020084B2B43EED33C027EA7C4E0CADFC632
SHA-256: CDD0AA1D9AA5CCA4F1DC1DE5436659181FFF2E647166F9BF1AC6F18801431B8D [             37.000]     CDD0AA1D9AA5CCA4F1DC1DE5436659181FFF2E647166F9BF1AC6F18801431B8D
SHA-256: D0F51667E9EDF939CAAD49A55993C24AC4996B9AFE38B4B68B7F06B243307278 [             37.000]     D0F51667E9EDF939CAAD49A55993C24AC4996B9AFE38B4B68B7F06B243307278
SHA-256: D15199382345ED1D1505662582522E3FABE974567FFFF1B1AFE03C7AAC578D9F [             37.000]     D15199382345ED1D1505662582522E3FABE974567FFFF1B1AFE03C7AAC578D9F
SHA-256: D2FB130705BDE5240BB48844CC0D6AC5B37ADC6353E3057CF44A7304EFCD2CF7 [             37.000]     D2FB130705BDE5240BB48844CC0D6AC5B37ADC6353E3057CF44A7304EFCD2CF7
SHA-256: D424E0F8DEC9C4BA53D9824105B5A40F23F4C369A55EA9E63368F1ADA2642431 [             37.000]     D424E0F8DEC9C4BA53D9824105B5A40F23F4C369A55EA9E63368F1ADA2642431
SHA-256: D62F41ACA203C670D46A85010CE8B0FC4996DD8C7FCB0DF4CE2D2DE0BC1D0AA1 [             37.000]     D62F41ACA203C670D46A85010CE8B0FC4996DD8C7FCB0DF4CE2D2DE0BC1D0AA1
SHA-256: D75FB5E2F418E684E72F60771A479B1A1E1A0A42BE0E3379CA1DCEA753A8755B [             37.000]     D75FB5E2F418E684E72F60771A479B1A1E1A0A42BE0E3379CA1DCEA753A8755B
SHA-256: D762DB5943A11BDD85D0697FC990C36AD98AA781C027416C999185C9DE9666E6 [             37.000]     D762DB5943A11BDD85D0697FC990C36AD98AA781C027416C999185C9DE9666E6
SHA-256: D81E6568DDDA59E0D3C8638F7E854E97748526E55F5E9AB5EE4114008058027D [             37.000]     D81E6568DDDA59E0D3C8638F7E854E97748526E55F5E9AB5EE4114008058027D
SHA-256: D85D097158BA15C9C1246B8DE51A9D7DD3276C46108DF1C945FB43E2068D749C [             37.000]     D85D097158BA15C9C1246B8DE51A9D7DD3276C46108DF1C945FB43E2068D749C
SHA-256: D8FD9797588F78F9462B866A8E2253ABAFBE231819C695E0CE2A168293F2D77C [             37.000]     D8FD9797588F78F9462B866A8E2253ABAFBE231819C695E0CE2A168293F2D77C
SHA-256: D91BD71E5311E75B07FEFAE4E9D1ECB07905C8CD922D614273C1ADF5DA980B6A [             37.000]     D91BD71E5311E75B07FEFAE4E9D1ECB07905C8CD922D614273C1ADF5DA980B6A
SHA-256: D95B7207AA116672A69633377E9007D21C1E67CF34BC215B590B91B4E7253CD1 [             37.000]     D95B7207AA116672A69633377E9007D21C1E67CF34BC215B590B91B4E7253CD1
SHA-256: DB1710EB7117F11B40DC669CC9E2346BD9312134294F692EE285971A213B7D36 [             37.000]     DB1710EB7117F11B40DC669CC9E2346BD9312134294F692EE285971A213B7D36
SHA-256: DB483F559D0CB9C7CCDAE01560050E0B8F8C64D2C36422BCDC8FA5DB583BBFF7 [             37.000]     DB483F559D0CB9C7CCDAE01560050E0B8F8C64D2C36422BCDC8FA5DB583BBFF7
SHA-256: DBE16FA616F8FD925855D1F76E90E05CF9FD379C85F5B132530D3B827F785460 [             37.000]     DBE16FA616F8FD925855D1F76E90E05CF9FD379C85F5B132530D3B827F785460
SHA-256: DC0DE21802BC89D3C39E82A26C3CED1487F56448ACC5255D54723891F4E44E8C [             37.000]     DC0DE21802BC89D3C39E82A26C3CED1487F56448ACC5255D54723891F4E44E8C
SHA-256: DC4B3997FECBCB014212E627CDD65799A52EE7A96D8C3C3E5ADB784CC8826247 [             37.000]     DC4B3997FECBCB014212E627CDD65799A52EE7A96D8C3C3E5ADB784CC8826247
SHA-256: DC62148AC754739D5F1864B600CDBD36178712A61DA050443A8C58D6CD130F4C [             37.000]     DC62148AC754739D5F1864B600CDBD36178712A61DA050443A8C58D6CD130F4C
SHA-256: DD2B293A54DE03F9DBD26504F3B5D77AC3BB917707CAA4E64D8D05D143DDB2F3 [             37.000]     DD2B293A54DE03F9DBD26504F3B5D77AC3BB917707CAA4E64D8D05D143DDB2F3
SHA-256: DD4D6B2C054230472B7B051999EEE80ABABF768CC3B9B4CB8695BD3F2A5537F8 [             37.000]     DD4D6B2C054230472B7B051999EEE80ABABF768CC3B9B4CB8695BD3F2A5537F8
SHA-256: DE029F18D92F6686ACF9228A1A4C2008585D56C6C7F3E0396414D52DCA2F4198 [             37.000]     DE029F18D92F6686ACF9228A1A4C2008585D56C6C7F3E0396414D52DCA2F4198
SHA-256: DF0B4CC2CFD2DC2E581B617DF9D63434CB7D3AF9952D8BB5F8EE4892609DA372 [             37.000]     DF0B4CC2CFD2DC2E581B617DF9D63434CB7D3AF9952D8BB5F8EE4892609DA372
SHA-256: DFF6560BCB4F4859925DBD3F2901730390643935F14BCEA9C063E3D198066DE8 [             37.000]     DFF6560BCB4F4859925DBD3F2901730390643935F14BCEA9C063E3D198066DE8
SHA-256: E34E6815F63AAFD8310235374723E00C8004363067CA22670FC90D3342AE3DCA [             37.000]     E34E6815F63AAFD8310235374723E00C8004363067CA22670FC90D3342AE3DCA
SHA-256: E3A0ADE5348623E173F8E773321B7154CBF6F28EF0AF0930D650D09E63F41264 [             37.000]     E3A0ADE5348623E173F8E773321B7154CBF6F28EF0AF0930D650D09E63F41264
SHA-256: E3D797919BFAEDD334F332658AC09FEAED07FEABBB39D2F01B97FF18C70950CB [             37.000]     E3D797919BFAEDD334F332658AC09FEAED07FEABBB39D2F01B97FF18C70950CB
SHA-256: E470CEACA5FA7F4463329471E95270B555CFAB56202687404CFC23E84E3C3707 [             37.000]     E470CEACA5FA7F4463329471E95270B555CFAB56202687404CFC23E84E3C3707
SHA-256: E81276941FB243AEFFC9447B4A57E3FFF8F4E218EAE62D337A8602E67ADF849E [             37.000]     E81276941FB243AEFFC9447B4A57E3FFF8F4E218EAE62D337A8602E67ADF849E
SHA-256: E82738BD0D9105A81628EF0E2A6DEAA4FEC0B2F2828E45E8D8E87D736E549771 [             37.000]     E82738BD0D9105A81628EF0E2A6DEAA4FEC0B2F2828E45E8D8E87D736E549771
SHA-256: E90A9D5D5C1B195FC88FB6B222534C150F6D2DC5C4496E40F79AF80B784436C2 [             37.000]     E90A9D5D5C1B195FC88FB6B222534C150F6D2DC5C4496E40F79AF80B784436C2
SHA-256: E9F28C520987B842258F4D1F0ECD3004F991BB145FD8C011A4330A3E326FBBC9 [             37.000]     E9F28C520987B842258F4D1F0ECD3004F991BB145FD8C011A4330A3E326FBBC9
SHA-256: EB1E222D6B1B8EF70705FACB4F120CC8B1D1816A4AD058B4FE0492D91A132C30 [             37.000]     EB1E222D6B1B8EF70705FACB4F120CC8B1D1816A4AD058B4FE0492D91A132C30
SHA-256: EB4676576D04D2F10B4CE72288FA6E5DF5B1F6274604F521B9CDE01E5E0ACF15 [             37.000]     EB4676576D04D2F10B4CE72288FA6E5DF5B1F6274604F521B9CDE01E5E0ACF15
SHA-256: EBE41D6D35E597FFBE8F7C17929A9A27350F34538FAB8B5A8F75A961EF5373AF [             37.000]     EBE41D6D35E597FFBE8F7C17929A9A27350F34538FAB8B5A8F75A961EF5373AF
SHA-256: ED169E0BF884A9A7DF1CD23B164CCBAAA4C3DFC0B0E962DF69836E3541915167 [             37.000]     ED169E0BF884A9A7DF1CD23B164CCBAAA4C3DFC0B0E962DF69836E3541915167
SHA-256: ED762370C31777D251DD92F4CC43D5D9468CFDF237465F68A1F18C1859A17E24 [             37.000]     ED762370C31777D251DD92F4CC43D5D9468CFDF237465F68A1F18C1859A17E24
SHA-256: F20E17924F71F116A87A28648433A61543C06F51ED06EFF4CFB1407D8FDE9EA5 [             37.000]     F20E17924F71F116A87A28648433A61543C06F51ED06EFF4CFB1407D8FDE9EA5
SHA-256: F30A039BEA38C80014B9CE80F54533FC314D279B73E689046602CD87E9BA4CED [             37.000]     F30A039BEA38C80014B9CE80F54533FC314D279B73E689046602CD87E9BA4CED
SHA-256: F326C6AB37993AF0E23316711BCC5CBC5C2D47D7D3519AA5F2ADAE04DDE71945 [             37.000]     F326C6AB37993AF0E23316711BCC5CBC5C2D47D7D3519AA5F2ADAE04DDE71945
SHA-256: F33D356CCDA7C72B4FAC2BED2837E2D8D7FD116191C4D950BDA431011CCA0F76 [             37.000]     F33D356CCDA7C72B4FAC2BED2837E2D8D7FD116191C4D950BDA431011CCA0F76
SHA-256: F3825855F1485C6EB7A0DDA19FC097793B9FEADB813CD10EDD8A45B58D65BD97 [             37.000]     F3825855F1485C6EB7A0DDA19FC097793B9FEADB813CD10EDD8A45B58D65BD97
SHA-256: F39C5C8EC37EACC3E95FA51217E8A021B9975F7ED3459D6CE2DBA94BC8AC71B8 [             37.000]     F39C5C8EC37EACC3E95FA51217E8A021B9975F7ED3459D6CE2DBA94BC8AC71B8
SHA-256: F48C3742202FE05AD86E4F112279E87D7545C39A0FDCA1F5600E5C7BCEAF327A [             37.000]     F48C3742202FE05AD86E4F112279E87D7545C39A0FDCA1F5600E5C7BCEAF327A
SHA-256: F589883E201672661A78D6FF21C4B3B749AF5853FC59BFFD940035069A68DD1D [             37.000]     F589883E201672661A78D6FF21C4B3B749AF5853FC59BFFD940035069A68DD1D
SHA-256: F6706FA113237E866DE7B7E4D79BE98F21C46A93D2B8A98AE0316C2A10C3AE0E [             37.000]     F6706FA113237E866DE7B7E4D79BE98F21C46A93D2B8A98AE0316C2A10C3AE0E
SHA-256: F88D8CCC7B844F640EA58D67BA8213BC657AD5C08C97B741CAE8B1BD1B8FF205 [             37.000]     F88D8CCC7B844F640EA58D67BA8213BC657AD5C08C97B741CAE8B1BD1B8FF205
SHA-256: FB4AF47721D4449692F3D1339BCBEC8578A7FE53BA01AD3B1335764BFFEB60A0 [             37.000]     FB4AF47721D4449692F3D1339BCBEC8578A7FE53BA01AD3B1335764BFFEB60A0
SHA-256: FB732127DB2B907DCEF4E87D8979146A43FA7AD915DD7587A8A890455F51FCC1 [             37.000]     FB732127DB2B907DCEF4E87D8979146A43FA7AD915DD7587A8A890455F51FCC1
SHA-256: FCA14D3C0B8CED91ACE33CCD96A13079E054311094D0B151BCB75622D768BADC [             37.000]     FCA14D3C0B8CED91ACE33CCD96A13079E054311094D0B151BCB75622D768BADC
SHA-256: FCCE109C8360963EB18975B94BDBE434BE1A49D3F53BDD768A99093B3EB838D2 [             37.000]     FCCE109C8360963EB18975B94BDBE434BE1A49D3F53BDD768A99093B3EB838D2

								  
*/

/*
	Here start the program.
	
	zpaqfranz can use a backwards compatible archive format, using various "packages"
	Each FRANZOFFSET occupies, for each archived file, a certain space,
	upto ~500 bytes, but they are still compressed, so there is less waste

	V1 (little overhead) is the default
	V2 (up to zpaqfranz 56) is for extended hashes, like sha-3
	V3 (from zpaqfranz 57+) is variable-sized in two
	"flavours": 190 bytes and 550.
	The shorter allows very long hashes (e.g. whirlpool),
	the bigger also contains a TAR-like format for storing
	symlinks (and other information)
*/

#define FRANZOFFSETV1 		50
#define FRANZOFFSETV2 		76
#define FRANZOFFSETV3 		550 // 190 + 360 posix
#define FRANZMAXPATH 		240

#define STRINGIFY(x) 			#x
#define TOSTRING(x)  			STRINGIFY(x)
/*
	FRANZO_SOMETHING can be used inside zpaq archive
	ALGO_SOMETHING   cannot (just for the sum command)
*/
#define FRANZO_NONE				0
#define FRANZO_CRC_32 			1
#define	FRANZO_XXHASH64			2 
#define FRANZO_SHA_1			3 
#define FRANZO_SHA_256			4 
#define	FRANZO_XXH3				5 
#define	FRANZO_BLAKE3			6 
#define FRANZO_SHA3				7 
#define FRANZO_MD5				8 
#define FRANZO_WINHASH64		9
#define FRANZO_WHIRLPOOL		10
#define FRANZO_HIGHWAY64		11
#define FRANZO_HIGHWAY128		12
#define FRANZO_HIGHWAY256		13

#define FRANZO_XXHASH64B		14
#define FRANZO_MD5B				15
#define FRANZO_BLAKE3B			16
#define FRANZO_SHA_256B			17
#define FRANZO_SHA3B			18
#define FRANZO_XXH3B			19
#define FRANZO_SHA_1B			20

#define ALGO_CRC32C			100
#define ALGO_WYHASH			101
#define ALGO_NILSIMSA		103
#define ALGO_ENTROPY		104
#define ALGO_QUICK			105
#define ALGO_ZETA			106
#define ALGO_ZETAENC		107


#define _FILE_OFFSET_BITS 64  // In Linux make sizeof(off_t) == 8. Define BEFORE including windows.h!!!
#ifndef UNICODE
	#define UNICODE  // For Windows
#endif
#ifndef DEBUG
	#define NDEBUG 1
#endif
#if defined(SOLARIS) || defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
	#ifndef unix
		#define unix 1
	#endif
#endif

#ifdef IPV6
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <netdb.h>
#endif

/// On Windows you need to link -lwsock32 -lws2_32
#ifdef SERVER
	#include <winsock2.h>
	#include <ws2tcpip.h>
#endif

#ifdef UNIX
	#define unix 1
#endif

#ifdef unix
	#ifndef ANCIENT
		#include <array>
		#include <signal.h> /// alpine
	#endif
	#if defined(SOLARIS)
		#include <kstat.h>
	#endif
	#if defined(__linux__) || defined(SOLARIS)
		#include <sys/statvfs.h>
	#endif
	#if defined(__linux__)
		#include <mntent.h>
	#endif
	#include <algorithm>
	#include <assert.h>
	#include <cstddef>
	#include <cstdio>
	#include <dirent.h>
	#include <fcntl.h>
	#include <iostream>
	#include <map>
	#include <math.h>
	#include <memory>
	#include <pthread.h>
	#include <stdarg.h>
	#include <stdexcept>
	#include <stdio.h>
	#include <stdint.h>
	#include <string.h>
	#include <string>
	#include <sys/ioctl.h>
	#include <sys/mman.h>
#ifndef __HAIKU__
	#include <sys/mount.h>
#endif
#ifdef __HAIKU__
	#include <OS.h>
#endif
	#include <sys/param.h>
	#include <sys/stat.h>
	#include <sys/time.h>
	#include <sys/times.h>
	#include <sys/types.h>
	#include <sys/utsname.h>
	#include <termios.h>
	#include <time.h>
	#include <unistd.h>
	#include <utime.h>
	#include <vector>
	#ifdef BSD
		#include <sys/sysctl.h>
		#include <sys/mount.h>
	#endif
	
	#include <netinet/in.h>
	#include <sys/socket.h>
	#include <netdb.h>

#else  // Assume Windows
	#include <assert.h>
	#include <time.h>
	#include <pthread.h>
	#include <math.h>
	#include <stdio.h>
	#include <stdint.h>
	#include <string.h>
	#include <unistd.h>
	#include <algorithm>
	#include <string>
	#include <vector>
	#include <map>
	#include <stdexcept>
	#include <cstddef>
	#include <conio.h>
	#include <windows.h>
	#include <io.h>
	#include <sys/stat.h>
	#include <fcntl.h> // for setmode()
	using namespace std;
#endif


bool	g_control_c				=false;
int 	g_incomplete_version	=0;
    
int64_t	g_starting_zpaqdate		=0;
int64_t g_starting_zpaqsize		=0;
int64_t g_starting_indexsize	=0;
int64_t g_starting_zpaqattr		=0;	

std::string g_thememfileblock	="";
std::string g_thememfileblock_h	="";

int64_t g_thememfilestart		=0;
int64_t g_thememfilestart_h		=0;
int64_t g_thememfilelength		=0;

int g_thememfilefragstart		=0;
int g_thememfilefragend			=0;

#ifdef SERVER
string	g_server		="127.0.0.1";
string	g_port			="4567";
SOCKET 	g_socket		=0;
int64_t g_socket_sended	=0;
int64_t g_socket_packet	=0;
int64_t g_socket_packets=0;
string 	g_cloudkey		="";
int64_t	g_enter			=0;
#endif
FILE* 	g_output_handle;
FILE* 	g_error_handle;
bool flagignore;
bool flagnotrim;
bool flaghw;	// this slow down vs HWSHA1
bool flagdebug;
bool flagdebug2;
bool flagdebug3;
bool flagdebug4;
bool flagdebug5;
std::string	g_processorname="";
bool flagnojit;

char* 	g_password;     				// points to password_string or NULL
char 	g_password_string[32]; 			// hash of -key argument
	
bool	g_flagcreating;
char 	command;
std::string	g_franzsnap;
std::string 	g_vss_shadow;
std::string  g_replaceme;
std::string 	g_copy;
std::string 	g_freeze;
std::string 	g_exec_error;
std::string  g_exec_warn;
std::string 	g_exec_ok;
std::string 	g_exec_text;
std::string 	g_exec;
int 	g_255;
std::string 	g_output;
std::string 	g_error;
std::string  g_ifexist;
///string  g_ismounted;
std::string 	g_script;
std::string 	g_sfx;
std::string 	g_sfxto;
std::string 	g_sfxnot;
std::string 	g_sfxonly;
std::string 	g_sfxuntil;
std::string 	g_deleteinto;
bool 	g_sfxflagall;
bool  	g_sfxflagforce;
bool	g_sse42;
bool	g_flagmultipart;
std::string 	g_archive;
std::string	g_indexname;
std::string	g_externalname;
std::string	g_input;
std::string	g_destination;
std::string	g_csvstring;
std::string	g_csvhf;
std::string	g_pidname;
bool		flagnopid;
FILE* 	g_pid_handle=0;
std::string	g_backupposition;
int64_t g_robocopy_check_sorgente;
int64_t g_robocopy_check_destinazione;
int64_t g_robocopy_makepath;
int64_t g_robocopy_makepath2;
int64_t g_robocopy_isequal;
int64_t g_robocopy_close;
int64_t g_robocopy_close2;
int64_t g_robocopy_touch;
int64_t g_robocopy_delete;
int64_t	g_robocopy_readopen;
int64_t	g_robocopy_openoutfile;
int64_t g_robocopy_fclose;
int64_t g_robocopy_fread;
int64_t g_robocopy_fwrite;

bool	g_testifselected;
uint64_t g_chunk_size	=0;
int64_t g_start			=0;
int64_t g_dimensione	=0;
int64_t g_scritti		=0;
int64_t g_maxposition	=0;
int64_t g_zerotime		=0;
int64_t g_bytescanned	=0;
int64_t g_filescanned	=0;
int64_t g_worked		=0;
int64_t g_fwritten		=0;
int64_t g_fexpected		=0;
int64_t g_fwrittencrc32	=0;

int64_t 	g_header_pos	=0;
bool		g_crc_getheader	=false; ///0 =  header; 1= jidac; 2=body
bool		g_veryfirst		=true;
uint32_t 	g_crc_header	=0;
uint32_t 	g_crc_jidac		=0;
uint32_t 	g_crc_body		=0;


int64_t g_ramdisksize	=0;
int64_t g_rd			=0;
int64_t g_rd_expected	=0;
int64_t g_startrd		=0;
int64_t g_startdownload	=0;
int 	g_rd_ultimotempo=0;
int64_t g_cdatasize		=0;
unsigned g_htsize		=0;
bool	g_fakewrite		=false;	// in add() disable write (ransomware)
uint64_t minsize;
uint64_t maxsize;
int64_t g_touch			=0;
int64_t	g_datefrom		=0;
int64_t g_dateto		=0;
std::string	g_until;
int		g_rangefrom		=0;
int		g_rangeto		=0;
int		g_rangelast		=0;
///int		red4			=-1;
#ifdef unix
std::string g_tempsnapshot;
std::string g_basesnapshot;
std::string g_dataset;
#endif
bool flagnosymlink;
bool flagnochecksum;
bool flagcrc32;
bool flagxxhash64;
bool flagxxhash64b;
bool flagsha1;
bool flagsha256;
bool flagsha256b;
bool flagxxh3;
bool flagmd5;
bool flagmd5b;
bool flagsha1b;
bool flagxxh3b;
bool flagblake3;
bool flagblake3b;
bool flagsha3;
bool flagsha3b;
bool flagwhirlpool;
bool flaghighway64;
bool flaghighway128;
bool flaghighway256;
bool flagcrc32c;
bool flagwyhash;
bool flagnilsimsa;
bool flagentropy;
bool flagwindate;

bool flagtmp;
bool flagbackupxxh3;
bool flagbackupzeta;
bool flag715;
bool flagappend;
bool flagbig;
bool flagchecksum;
bool flagchecktxt;
bool flagsfx;
bool flagfasttxt;
bool flagcomment;
bool flagdesc;
bool flagdonotforcexls;
bool flagfilelist;
bool flagexternal;
bool flaginput;
bool flagdestination;
bool flagads;
bool flagfast;
bool flagfix255;
bool flagfixeml;
bool flagflat;
bool flagforce;
bool flagforcewindows;
bool flagforcezfs;
bool flagfrugal;
bool flaghashdeep;
bool flagkill;
bool flaght;
bool flagnocaptcha;
bool flagmm;
bool flagattr;
bool flagthunderbird;
bool flagnoattributes;
bool flagnodedup;
bool flagnoeta;
bool flagnopath;
bool flagnoqnap;
bool flagnomac;
bool flagnosynology;
bool flagnorecursion;
bool flagnosort;
bool flagpakka;
bool flagdistinct;
bool flagparanoid;
bool flagpaq;
bool flagcollision;
bool flagramdisk;
bool flagrename;
bool flagsilent;
bool flagnoconsole;
bool flagnocolor;
bool flagnodelete;
bool flagskipzfs;
bool flagspace;
bool flagssd;
bool flagnomore;
bool flagsalt;
bool flaghdd;
bool flagquick;
bool flagzeta;
bool flagzetaenc;
bool flagquiet;
bool flagstat;
bool flagstdin;
bool flagterse;
bool flagnodel;
bool flagcsv;
bool flagstdout;
bool flagstore;
bool flagtar;
bool flagtest;
bool flagtouch;
bool flagutc;
bool flagdate;
bool flagutf;
bool flagpause;
bool flagverbose;
bool flagverify;
bool flagvss;
bool flagzero;
bool flaghome;
bool flagfixcase;


#ifdef _WIN32
#define MYFOREGROUND_BLUE            0x0001
#define MYFOREGROUND_GREEN           0x0002
#define MYFOREGROUND_RED             0x0004
#define MYFOREGROUND_INTENSITY       0x0008
#define MYBACKGROUND_BLUE            0x0010
#define MYBACKGROUND_GREEN           0x0020
#define MYBACKGROUND_RED             0x0040
#define MYBACKGROUND_INTENSITY       0x0080
#else
/// *nix colors
#define TEXT_BLACK   	"\x1b[30m"
#define TEXT_RED     	"\x1b[31m"
#define TEXT_GREEN   	"\x1b[32m"
#define TEXT_YELLOW  	"\x1b[33m"
#define TEXT_BLUE    	"\x1b[34m"
#define TEXT_MAGENTA	"\x1b[35m"
#define TEXT_CYAN		"\x1b[36m"
#define TEXT_WHITE		"\x1b[37m"
#define TEXT_RESET		"\x1b[0m"

///#define TEXT_BLACK_BRIGHT   "\x1b[1;30m"

#define TEXT_BLACK_BRIGHT   	"\x1b[1;30m"
#define TEXT_RED_BRIGHT     	"\x1b[1;31m"
#define TEXT_GREEN_BRIGHT   	"\x1b[1;32m"
#define TEXT_YELLOW_BRIGHT  	"\x1b[1;33m"
#define TEXT_BLUE_BRIGHT    	"\x1b[1;34m"
#define TEXT_MAGENTA_BRIGHT		"\x1b[1;35m"
#define TEXT_CYAN_BRIGHT		"\x1b[1;36m"
#define TEXT_WHITE_BRIGHT		"\x1b[1;37m"
#define TEXT_RESET_BRIGHT		"\x1b[1;0m"

#define BG_BLACK   		"\x1b[40m"
#define BG_RED     		"\x1b[41m"
#define BG_GREEN   		"\x1b[42m"
#define BG_YELLOW  		"\x1b[43m"
#define BG_BLUE    		"\x1b[44m"
#define BG_MAGENTA 		"\x1b[45m"
#define BG_CYAN    		"\x1b[46m"
#define BG_WHITE   		"\x1b[47m"

#define BG_BLACK_BRIGHT			"\x1b[1;40m"
#define BG_RED_BRIGHT     		"\x1b[1;41m"
#define BG_GREEN_BRIGHT   		"\x1b[1;42m"
#define BG_YELLOW_BRIGHT  		"\x1b[1;43m"
#define BG_BLUE_BRIGHT    		"\x1b[1;44m"
#define BG_MAGENTA_BRIGHT 		"\x1b[1;45m"
#define BG_CYAN_BRIGHT    		"\x1b[1;46m"
#define BG_WHITE_BRIGHT   		"\x1b[1;47m"
#endif


int g_console_attributes=-1;

bool iswindowsxp()
{
#ifdef _WIN32
    OSVERSIONINFO osvi;
    osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
    GetVersionEx(&osvi);

    return (osvi.dwMajorVersion == 5)  && (osvi.dwMinorVersion == 1);
#endif
	return false;
}

#ifdef _WIN32
void color_save()
{
	if (flagnocolor)
		return;
	HANDLE hconsole=GetStdHandle(STD_OUTPUT_HANDLE);
	if (hconsole==INVALID_HANDLE_VALUE)
		return;
	CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
	GetConsoleScreenBufferInfo(hconsole,&csbiInfo);
	if ((csbiInfo.wAttributes & MYFOREGROUND_BLUE) &&
	 (csbiInfo.wAttributes & MYFOREGROUND_GREEN) &&
	 (csbiInfo.wAttributes & MYFOREGROUND_RED))
		g_console_attributes=csbiInfo.wAttributes;
}
#endif
void color_restore()
{
	if (flagnocolor)
		return;
#ifdef _WIN32
	if (g_console_attributes<0)
		return;
	HANDLE hconsole=GetStdHandle(STD_OUTPUT_HANDLE);
	if (hconsole==INVALID_HANDLE_VALUE)
		return;
	SetConsoleTextAttribute(hconsole,g_console_attributes);
#else
	printf(TEXT_RESET);
#endif
}
#ifdef _WIN32
void color_something(int i_color)
{
	if (flagnocolor)
		return;
	if (g_console_attributes<0)
		return;
	HANDLE hconsole=GetStdHandle(STD_OUTPUT_HANDLE);
	if (hconsole==INVALID_HANDLE_VALUE)
		return;
	SetConsoleTextAttribute(hconsole,i_color);
	return;
}
#endif
void color_red()
{
	if (flagnocolor)
		return;
#ifdef _WIN32
	color_something(4);
#else
	printf(TEXT_RED_BRIGHT);
#endif
}
void color_green()
{
	if (flagnocolor)
		return;
#ifdef _WIN32
	///getenv("WT_SESSION")
	color_something(MYFOREGROUND_GREEN+MYFOREGROUND_INTENSITY);
#else
	printf(TEXT_GREEN_BRIGHT);
#endif
}
void color_yellow()
{
	if (flagnocolor)
		return;
#ifdef _WIN32
		color_something(14);
#else
	printf(TEXT_YELLOW_BRIGHT);
#endif
}
void color_blackongreen()
{
	if (flagnocolor)
		return;
#ifdef _WIN32
	color_something(160);
#else
	
#if (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__)
	color_green();
#else
	printf("\x1b[30m" "\x1b[102m");
#endif
#endif
}


static const int64_t LIST_HT_BAD=   -0x7FFFFFFFFFFFFFFALL;  // no such frag


std::string print_datetime(bool i_flagout=true);

#ifdef _WIN32
// In Windows, convert UTF-8 string to wide string ignoring
// invalid UTF-8 or >64K. Convert "/" to slash (default "\").
std::wstring utow(const char* ss, char slash='\\') {
  assert(sizeof(wchar_t)==2);
  assert((wchar_t)(-1)==65535);
  std::wstring r;
  if (!ss) return r;
  const unsigned char* s=(const unsigned char*)ss;
  for (; s && *s; ++s) {
    if (s[0]=='/') r+=slash;
    else if (s[0]<128) r+=s[0];
    else if (s[0]>=192 && s[0]<224 && s[1]>=128 && s[1]<192)
      r+=(s[0]-192)*64+s[1]-128, ++s;
    else if (s[0]>=224 && s[0]<240 && s[1]>=128 && s[1]<192
             && s[2]>=128 && s[2]<192)
      r+=(s[0]-224)*4096+(s[1]-128)*64+s[2]-128, s+=2;
  }
  return r;
}
#endif

void printUTF8(const char* s, FILE* f=stdout)
{
	assert(f);
	assert(s);
	if (g_output_handle!=0)
		fprintf(g_output_handle,"%s",s);
	if (flagsilent)
		return;
#ifdef unix
  fprintf(f, "%s", s);
#else
  const HANDLE h=(HANDLE)_get_osfhandle(_fileno(f));
  DWORD ft=GetFileType(h);
  if (ft==FILE_TYPE_CHAR) {
    fflush(f);
    std::wstring w=utow(s, '/');
    DWORD n=0;
    WriteConsole(h, w.c_str(), w.size(), &n, 0);
  }
  else
    fprintf(f, "%s", s);
#endif
}



void	decode_print_flag(const char* i_buffer,bool& o_flagcolon,bool& o_flagerror,bool& o_flagwarning)
{
	if (i_buffer==NULL)
	{
		printf("02734: i_buffer calcola flag NULL!\n");
		exit(0);
	}
	o_flagcolon=false;
	o_flagerror=false;
	int format_len=strlen(i_buffer);

	if (format_len>7)
	{
		bool fivedigits=(isdigit(i_buffer[0])) &&
						(isdigit(i_buffer[1])) &&
						(isdigit(i_buffer[2])) &&
						(isdigit(i_buffer[3])) &&
						(isdigit(i_buffer[4])) &&
						((i_buffer[5]=='!') || (i_buffer[5]=='$') || (i_buffer[5]==':') ) &&
						(i_buffer[6]==' ');
						
		if (fivedigits)
		{
			o_flagerror		=(i_buffer[5]=='!');
			o_flagwarning	=(i_buffer[5]=='$');
			o_flagcolon		=(i_buffer[5]==':');
		}
	}
}

void replacezwiths(const char *input, char *output) 
{
    const char *ptr = input;
    char *out_ptr = output;

    while (*ptr != '\0') {
        // Controlla se troviamo la coppia "%Z"
        if (ptr[0] == '%' && ptr[1] == 'Z') {
            // Sostituisci con "%s"
            *out_ptr++ = '%';
            *out_ptr++ = 's';
            ptr += 2; // Salta la coppia "%Z"
        } else {
            // Copia il carattere corrente
            *out_ptr++ = *ptr++;
        }
    }
    *out_ptr = '\0'; // Termina la stringa output
}

int mypos(const std::string& i_substring,const std::string& i_string)
{
    size_t start_pos = i_string.find(i_substring);
    if	(start_pos==std::string::npos)
        return -1;
	else
		return (int)start_pos;
}

bool fileexists(const std::string& i_filename)
{
#ifdef unix
// true even for dirs no S_ISDIR
  struct stat buffer;
  return (stat(i_filename.c_str(),&buffer)==0);
#endif
#ifdef _WIN32

	if (flagads)
		if (mypos(":",i_filename)!=-1)
		{
			if (flagdebug3)
				printf("00109: flagads ON and : in i_filename\n");
			HANDLE hFile = CreateFileW((utow(i_filename.c_str()).c_str()), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
			if (hFile!=INVALID_HANDLE_VALUE)
			{
				if (flagdebug3)
					printf("00110: ADS FOUNDED!\n");
				CloseHandle(hFile);
				return true;
			}
			return false;
	}

	HANDLE	myhandle;
	WIN32_FIND_DATA findfiledata;
	std::wstring wpattern=utow(i_filename.c_str());
	myhandle=FindFirstFile(wpattern.c_str(),&findfiledata);
	if (myhandle!=INVALID_HANDLE_VALUE)
	{
		FindClose(myhandle);
		return true;
	}
	return false;
#endif
	return false;
}

// Delete a file, return true if successful
bool delete_file(const char* filename) {
#ifdef unix
	return remove(filename)==0;
#else
	if (!fileexists(filename))
		return true;
	SetFileAttributes(utow(filename).c_str(),FILE_ATTRIBUTE_NORMAL);
	return DeleteFile(utow(filename).c_str());
#endif
}

bool prepare_error_log()
{
	if (g_error=="")
		return false;
	
	if (g_error_handle==0)
	{
		if (flagverbose)
			printf("02747: OPENING ERROR FILE %s\n",g_error.c_str());
		g_error_handle=fopen(g_error.c_str(),"wb");
	}

	if (g_error_handle==0)
		return false;
	
	return true;
	
}
void my_print_on_error_z(const char* i_input)
{
	if (i_input==NULL)
		return;
	if (prepare_error_log())
	{
		fprintf(g_error_handle,"%s",i_input);
		fflush(g_error_handle);
	}
}
void my_print_on_error_s(const char* i_buffer,const char* i_input)
{
	if (i_buffer==NULL)
		return;
	if (i_input==NULL)
		return;
	if (prepare_error_log())
	{
		fprintf(g_error_handle,i_buffer,i_input);
		fflush(g_error_handle);
	}
}
void my_print_on_error_i(char* i_buffer,int i_input)
{
	if (i_buffer==NULL)
		return;
	if (prepare_error_log())
	{
		fprintf(g_error_handle,i_buffer,i_input);
		fflush(g_error_handle);
	}
}
void my_print_on_error_u(char* i_buffer,unsigned int i_input)
{
	if (i_buffer==NULL)
		return;
	if (prepare_error_log())
	{
		fprintf(g_error_handle,i_buffer,i_input);
		fflush(g_error_handle);
	}
}
void my_print_on_error_d(const char* i_buffer,double i_input)
{
	if (i_buffer==NULL)
		return;
	if (prepare_error_log())
	{
		fprintf(g_error_handle,i_buffer,i_input);
		fflush(g_error_handle);
	}
}

// La nostra funzione myprintf migliorata
void myprintf(const char *format, ...) 
{
	bool	flagerror	=false;
	bool	flagcolon	=false;
	bool	flagwarning	=false;
    char buffer[4096]; // Buffer temporaneo per formati complessi
	
	
	if (flagsilent)
	{
		char	fixata[4096];
		replacezwiths(format,fixata);

		va_list args2;
		va_start(args2,format);
		vsnprintf(buffer,sizeof(buffer),fixata,args2);
		va_end(args2);
		
		decode_print_flag(buffer,flagcolon,flagerror,flagwarning);
		if (g_output_handle!=0)
			fprintf(g_output_handle,"%s",buffer);
	
		if (flagerror)
			my_print_on_error_z(buffer);

		return;
	}


    va_list args;
    va_start(args, format);

	decode_print_flag(format,flagcolon,flagerror,flagwarning);
	
	if (flagcolon || flagwarning)
	{
		if (flagdebug)
		{
			color_green();
			printf("%c%c%c%c%c: ",format[0],format[1],format[2],format[3],format[4]);
			color_restore();
		}
		format+=7;
	}
		
	if (flagerror)
		color_red();
	else
	{
		if (flagwarning)
			color_yellow();
	}
    const char *p = format;
    while (*p) 
	{
        if (*p == '%') 
		{
            p++;  // Passa al carattere successivo dopo '%'

// Gestisce il caso di '%%' per stampare un singolo '%'
            if (*p == '%') 
			{
                putchar('%');
            } 
			else 
			{

				char *b = buffer;
				*b++ = '%'; // Aggiunge il carattere '%' nel buffer

				// Gestisce eventuali flag come '-' o '0' per l'allineamento o padding
				while (*p && (strchr("-+0 ", *p) != NULL)) 
					*b++ = *p++;

				// Gestisce larghezza del campo (es. 10 in %10s)
				while (*p && (*p >= '0' && *p <= '9')) 
					*b++ = *p++;

				// Gestisce la precisione (es. .2 in %5.2f)
				if (*p == '.') 
				{
					*b++ = *p++; // Aggiunge il '.'
					while (*p && (*p >= '0' && *p <= '9')) 
						*b++ = *p++;
				}

				// Gestisce il carattere finale del formato (es. 'd', 's', 'f')
				if (*p && strchr("diouxXfscZ", *p) != NULL) 
				{
					*b++ = *p;  // Aggiunge il carattere di conversione
					*b = '\0';  // Termina la stringa nel buffer
				}

				// Ora gestiamo il formato accumulato nel buffer
				if (*p == 'd' || *p == 'i') 
				{
					int i = va_arg(args, int);
					printf(buffer, i);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,i);
					if (flagerror)
						my_print_on_error_i(buffer,i);
					
				} 
				else if (*p == 'u') 
				{
					unsigned int u = va_arg(args, unsigned int);
					printf(buffer, u);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,u);
					if (flagerror)
						my_print_on_error_u(buffer,u);
					
				} 
				else if (*p == 'x' || *p == 'X') 
				{
					unsigned int x = va_arg(args, unsigned int);
					printf(buffer, x);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,x);
					if (flagerror)
						my_print_on_error_u(buffer,x);
				}
				else if (*p == 'f') 
				{
					double f = va_arg(args, double);
					printf(buffer, f);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,f);
					if (flagerror)
						my_print_on_error_d(buffer,f);
					
				}
				else if (*p == 's') 
				{
					char *s = va_arg(args, char *);
					printf(buffer, s);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,s);
					if (flagerror)
						my_print_on_error_s(buffer,s);
					
				}
				else if (*p == 'c') 
				{
					int c = va_arg(args, int);
					printf(buffer, c);
					if (g_output_handle!=0)
						fprintf(g_output_handle,buffer,c);
					if (flagerror)
						my_print_on_error_i(buffer,c);
				}
				else if (*p == 'Z') 
				{
					// Gestione speciale del token %Z
					char *s = va_arg(args, char *);
					printUTF8(s);
					if (flagerror)
						my_print_on_error_z(s);
				}
			}
        } 
		else 
		{
            // Stampa un carattere normale (non un token di formato)
            putchar(*p);
			if (g_output_handle!=0)
				fputc(*p,g_output_handle);
			if (flagerror)
			{
				if (prepare_error_log())
				fputc(*p,g_error_handle);
			}
        }

        p++;  // Passiamo al carattere successivo nella stringa di formato
    }
    va_end(args);
#ifndef _WIN32
	fflush(stdout);
#endif
	if (flagerror || flagwarning)
		color_restore();
}

/// LICENSE_START.23

//  This is a reworked https://github.com/codewithnick/ascii-art
//  Just one font, different output char, no streams

class Fonts
{
    unsigned int def_rows;
    unsigned int def_cols;
    unsigned int char_rows;
    unsigned int char_cols;
    unsigned int curr_col;
    std::vector<std::vector<char> > letters;
/*

zpaqfranz.cpp:2930:25: note: the layout of aggregates containing vectors with 8-byte alignment has changed in GCC 5
 2930 |         this->char_rows = rows ? rows : def_rows;
 */
protected:
    char **getCharGrid(unsigned int rows = 0, unsigned int cols = 0)
    {
        this->char_rows = rows ? rows : def_rows;
        this->char_cols = cols ? cols : def_cols;

        char **char_grid = new char *[char_rows];
        for (unsigned int i = 0; i < char_rows; i++)
        {
            char_grid[i] = new char[char_cols];
            for (unsigned int j = 0; j < char_cols; j++)
                char_grid[i][j] = ' ';
        }
        return char_grid;
    }
	void destroyspace()
    {
        letters.clear();
    }

public:
    Fonts(int def_rows, int def_cols)
    {
        this->def_rows = def_rows;
        this->def_cols = def_cols;

        letters.reserve(def_rows);
        for (int i = 0; i < def_rows; i++)
            letters.push_back(std::vector<char>(100, ' ')); // Create rows with 100 spaces each
        curr_col = 0;
    }

    void pushChar(char **character)
    {
        if (!character)
            return;

        while (letters.size() < char_rows)
            letters.push_back(std::vector<char>(100, ' ')); // Create rows with 100 spaces each

		for (unsigned int i = 0; i < char_rows; i++)
        {
            for (unsigned int j = 0; j < char_cols; j++)
            {
                letters[i][j + curr_col] = character[i][j];
            }
        }
        curr_col += (char_cols + 2);
    }

/*
    std::vector<std::vector<char> > getletters()
    {
        return letters;
    }
*/
    void printvector()
    {
        for (unsigned int i = 0; i < letters.size(); i++)
        {
			unsigned int lastspace=letters[0].size();
            for (unsigned int j=letters[0].size()-1; j>0; j--)
				if (letters[i][j]!=' ')
				{
					lastspace=j;
					break;
				}
            ///for (unsigned int j = 0; j < letters[0].size(); j++)
			for (unsigned int j = 0; j < lastspace; j++)
				myprintf("%c",letters[i][j]);
    		myprintf("\n");
        }
    }

    /********************************adding virtual functions********************************/
    // Virtual functions for space
    virtual char **space()
    {
        return 0;
    }

    // Virtual functions for lowercase letters
    virtual char **a()
    {
        return 0;
    }
    virtual char **b()
    {
        return 0;
    }
    virtual char **c()
    {
        return 0;
    }
    virtual char **d()
    {
        return 0;
    }
    virtual char **e()
    {
        return 0;
    }
    virtual char **f()
    {
        return 0;
    }
    virtual char **g()
    {
        return 0;
    }
    virtual char **h()
    {
        return 0;
    }
    virtual char **i()
    {
        return 0;
    }
    virtual char **j()
    {
        return 0;
    }
    virtual char **k()
    {
        return 0;
    }
    virtual char **l()
    {

        return 0;
    }
    virtual char **m()
    {
 
        return 0;
    }
    virtual char **n()
    {
        return 0;
    }
    virtual char **o()
    {
        return 0;
    }
    virtual char **p()
    {
        return 0;
    }
    virtual char **q()
    {
        return 0;
    }
    virtual char **r()
    {
        return 0;
    }
    virtual char **s()
    {
        return 0;
    }
    virtual char **t()
    {
        return 0;
    }
    virtual char **u()
    {
        return 0;
    }
    virtual char **v()
    {
        return 0;
    }
    virtual char **w()
    {
        return 0;
    }
    virtual char **x()
    {
        return 0;
    }
    virtual char **y()
    {
        return 0;
    }
    virtual char **z()
    {
        return 0;
    }

    // Virtual functions for uppercase letters
    virtual char **A()
    {
        return 0;
    }
    virtual char **B()
    {
        return 0;
    }
    virtual char **C()
    {
        return 0;
    }
    virtual char **D()
    {
        return 0;
    }
    virtual char **E()
    {
        return 0;
    }
    virtual char **F()
    {
        return 0;
    }
    virtual char **G()
    {
        return 0;
    }
    virtual char **H()
    {
        return 0;
    }
    virtual char **I()
    {
        return 0;
    }
    virtual char **J()
    {
        return 0;
    }
    virtual char **K()
    {
        return 0;
    }
    virtual char **L()
    {
        return 0;
    }
    virtual char **M()
    {
        return 0;
    }
    virtual char **N()
    {
        return 0;
    }
    virtual char **O()
    {
        return 0;
    }
    virtual char **P()
    {
        return 0;
    }
    virtual char **Q()
    {
        return 0;
    }
    virtual char **R()
    {
        return 0;
    }
    virtual char **S()
    {
        return 0;
    }
    virtual char **T()
    {
        return 0;
    }
    virtual char **U()
    {
        return 0;
    }
    virtual char **V()
    {
        return 0;
    }
    virtual char **W()
    {
        return 0;
    }
    virtual char **X()
    {
        return 0;
    }
    virtual char **Y()
    {
        return 0;
    }
    virtual char **Z()
    {
        return 0;
    }
    virtual char **zero()
    {
        return 0;
    }
    virtual char **one()
    {
        return 0;
    }
    virtual char **two()
    {
        return 0;
    }
    virtual char **three()
    {
        return 0;
    }
    virtual char **four()
    {
        return 0;
    }
    virtual char **five()
    {
        return 0;
    }
    virtual char **six()
    {
        return 0;
    }
    virtual char **seven()
    {
        return 0;
    }
    virtual char **eight()
    {
        return 0;
    }
    virtual char **nine()
    {
        return 0;
    }

    /********************************done adding virtual functions********************************/
	virtual ~Fonts()
    {
        destroyspace();
    }
/*
    ~Fonts()
    {
        destroyspace();
    }
	*/
};

class SevenStar : public Fonts
{
	static const int rows = 7;
	static const int cols = 7;

public:
	SevenStar() : Fonts(rows, cols) {}

	char **A()
	{
		char **character = getCharGrid();

		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = 'A';
					else
						character[i][j] = ' ';
				}

				if (i == 1)
				{
					if (j % 3 == 0)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'A';
				}

				if (i == 2 || i == 3 || i == 5 || i == 6)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
					{
						character[i][j] = 'A';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				if (i == 4)
				{

					character[i][j] = 'A';
				}
			}
		}

		return character;
	}
	char **B()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'B';
				}

				if (i >= 1 && i <= 5 && i != 3)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'B';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'B';
				}
			}
		}
		return character;
	}
	char **C()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				// first aur last line
				if (i == 0 || i == 6)
				{
					if ((j % 7) % 3 != 0)
					{
						character[i][j] = 'C';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				// second nd second last line

				if (i == 1 || i == 5)
				{
					if (((j) % 7) < 2 || ((j) % 7) > 4)
					{
						character[i][j] = 'C';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				// rest

				if (i == 2 || i == 4 || i == 3)
				{
					if (((j) % 7) < 2)
					{
						character[i][j] = 'C';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}
	char **D()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (((j) % 7) != 0 && ((j) % 7) % 3 == 0)
						character[i][j] = ' ';
					else
						character[i][j] = 'D';
				}
				else
				{
					if (((j) % 7) == 1 || ((j) % 7) == 2 || ((j) % 7) == 5 || ((j) % 7) == 6)
						character[i][j] = 'D';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **E()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					character[i][j] = 'E';
				}

				if (i == 1 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = 'E';
					else
						character[i][j] = ' ';
				}

				if (i == 2 || i == 4)
				{
					if (j == 0 || j == 1)
						character[i][j] = 'E';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'E';
				}
			}
		}
		return character;
	}

	char **F()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					character[i][j] = 'F';
				}

				if (i == 2 || i == 5 || i == 6 || i == 1 || i == 4)
				{
					if (j == 0 || j == 1)
						character[i][j] = 'F';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'F';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **G()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'G';
				}

				if (i == 1 || i == 5 || i == 4)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = 'G';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 2 || j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = 'G';
				}

				if (i == 2)
				{
					if (j == 0 || j == 1)
						character[i][j] = 'G';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **H()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i >= 0 && i <= 6)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = 'H';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j > 1 && j < 5)
						character[i][j] = 'H';
				}
			}
		}
		return character;
	}

	char **I()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					character[i][j] = 'I';
				}
				else
				{
					if (j == 2 || j == 3 || j == 4)
					{
						character[i][j] = 'I';
					}
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **J()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j < 3)
						character[i][j] = ' ';
					else
						character[i][j] = 'J';
				}

				if (i == 1 || i == 2 || i == 3)
				{
					if (j == 4 || j == 5)
						character[i][j] = 'J';
					else
						character[i][j] = ' ';
				}

				if (i == 4 || i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'J';
				}

				if (i == 6)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'J';
				}
			}
		}
		return character;
	}

	char **K()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'K';
				}

				if (i == 1 || i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'K';
				}

				if (i == 3)
				{
					if (j == 4 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'K';
				}

				if (i == 2 || i == 4)
				{
					if (j == 2 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'K';
				}
			}
		}
		return character;
	}

	char **L()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i < 6)
				{
					if (j == 0 || j == 1)
						character[i][j] = 'L';
					else
						character[i][j] = ' ';
				}
				else
					character[i][j] = 'L';
			}
		}
		return character;
	}

	char **M()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'M';
				}

				if (i == 1)
				{
					if (j == 3)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'M';
				}

				if (i == 2)
				{
					character[i][j] = 'M';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'M';
				}

				if (i >= 4 && i <= 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'M';
				}
			}
		}
		return character;
	}

	char **N()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'N';
				}

				if (i == 2 || i == 1)
				{
					if (j == 3 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'N';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
					{
						character[i][j] = 'N';
					}
				}

				if (i == 4 || i == 5)
				{
					if (j == 2 || j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = 'N';
				}
			}
		}
		return character;
	}
	char **O()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 6 || i == 0)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'O';
				}
				else
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'O';
				}
			}

			//	cout<<endl;
		}
		return character;
	}

	char **P()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'P';
				}

				if (i == 1 || i == 2)
				{
					if (j == 2 || j == 3 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'P';
				}

				if (i >= 4 && i <= 6)
				{
					if (j == 0 || j == 1)
					{
						character[i][j] = 'P';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}

	char **Q()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j % 3 == 0)
						character[i][j] = ' ';
					else
						character[i][j] = 'Q';
				}

				if (i >= 1 && i <= 4)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'Q';
				}

				if (i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'Q';
				}
				if (i == 6)
				{
					if (j == 0 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'Q';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **R()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'R';
				}

				if (i == 1 || i == 2 || i == 5 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'R';
				}

				if (i == 4)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'R';
				}
			}
			// cout<<endl;
		}
		return character;
	}

	char **S()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'S';
				}

				if (i == 1 || i == 5)
				{
					if (j == 3 || j == 2 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'S';
				}

				if (i == 2)
				{
					if (j == 4 || j == 5 || j == 6)
					{
						character[i][j] = ' ';
					}
					else
					{
						character[i][j] = 'S';
					}
				}

				if (i == 3)
				{

					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'S';
				}

				if (i == 4)
				{
					if (j == 4 || j == 5 || j == 6)
						character[i][j] = 'S';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **T()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					character[i][j] = 'T';
				}

				if (i >= 1 && i <= 6)
				{
					if (j == 3 || j == 2)
						character[i][j] = 'T';
					else
						character[i][j] = ' ';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **U()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i <= 5)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'U';
				}
				else
				{
					if (j == 0 || j == 6)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'U';
				}
			}
		}
		return character;
	}

	char **V()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = 'V';
					else
						character[i][j] = ' ';
				}

				if (i == 5)
				{
					if (j % 3 == 0)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = 'V';
				}
				if (i < 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
					{
						character[i][j] = 'V';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}

	char **W()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 1 || i == 2 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'W';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = 'W';
				}

				if (i == 4)
				{
					character[i][j] = 'W';
				}

				if (i == 5)
				{
					if (j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = 'W';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **X()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 1 || i == 5 || i == 6)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'X';
				}

				if (i == 2 || i == 4)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'X';
				}

				if (i == 3)
				{
					if (j == 3 || j == 2)
						character[i][j] = 'X';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **Y()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{

			for (int j = 0; j < 7; j++)
			{

				if (i == 0 || i == 1 || i == 2)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'Y';
				}

				if (i == 3)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = 'Y';
				}

				if (i > 3)
				{
					if (j == 2 || j == 3)
						character[i][j] = 'Y';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **Z()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j > 0)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}

				if (i == 1)
				{
					if (j == 5 || j == 6)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}

				if (i == 2)
				{
					if (j == 4 || j == 5)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 3 || j == 4)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 3 || j == 2)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}
				if (i == 5)
				{
					if (j == 1 || j == 2)
						character[i][j] = 'Z';
					else
						character[i][j] = ' ';
				}
			}

			// cout<<endl;
		}
		return character;
	}
	// small letter
	char **a()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 1)
				{
					if (j % 3 == 0)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}

				if (i == 2 || i == 3 || i == 5 || i == 6)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				if (i == 4)
				{

					character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **b()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i >= 1 && i <= 5 && i != 3)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}
	char **c()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				// first aur last line
				if (i == 0 || i == 6)
				{
					if ((j % 7) % 3 != 0)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				// second nd second last line

				if (i == 1 || i == 5)
				{
					if (((j) % 7) < 2 || ((j) % 7) > 4)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}

				// rest

				if (i == 2 || i == 4 || i == 3)
				{
					if (((j) % 7) < 2)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}
	char **d()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (((j) % 7) != 0 && ((j) % 7) % 3 == 0)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
				else
				{
					if (((j) % 7) == 1 || ((j) % 7) == 2 || ((j) % 7) == 5 || ((j) % 7) == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **e()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					character[i][j] = '%';
				}

				if (i == 1 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 2 || i == 4)
				{
					if (j == 0 || j == 1)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **f()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					character[i][j] = '%';
				}

				if (i == 2 || i == 5 || i == 6 || i == 1 || i == 4)
				{
					if (j == 0 || j == 1)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **g()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 5 || i == 4)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j == 2 || j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 2)
				{
					if (j == 0 || j == 1)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **h()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i >= 0 && i <= 6)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 3)
				{
					if (j > 1 && j < 5)
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **i()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					character[i][j] = '%';
				}
				else
				{
					if (j == 2 || j == 3 || j == 4)
					{
						character[i][j] = '%';
					}
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **j()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j < 3)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 2 || i == 3)
				{
					if (j == 4 || j == 5)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 4 || i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 6)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **k()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 4 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 2 || i == 4)
				{
					if (j == 2 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **l()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i < 6)
				{
					if (j == 0 || j == 1)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
				else
					character[i][j] = '%';
			}
		}
		return character;
	}

	char **m()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1)
				{
					if (j == 3)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}

				if (i == 2)
				{
					character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i >= 4 && i <= 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **n()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 2 || i == 1)
				{
					if (j == 3 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
					{
						character[i][j] = '%';
					}
				}

				if (i == 4 || i == 5)
				{
					if (j == 2 || j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}
	char **o()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 6 || i == 0)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
				else
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}

			//	cout<<endl;
		}
		return character;
	}

	char **p()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 2)
				{
					if (j == 2 || j == 3 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}

				if (i >= 4 && i <= 6)
				{
					if (j == 0 || j == 1)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}

	char **q()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j % 3 == 0)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i >= 1 && i <= 4)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 5)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
				if (i == 6)
				{
					if (j == 0 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **r()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 3)
				{
					if (j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 2 || i == 5 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 4)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}
			// cout<<endl;
		}
		return character;
	}

	char **s()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 1 || i == 5)
				{
					if (j == 3 || j == 2 || j == 4)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}

				if (i == 2)
				{
					if (j == 4 || j == 5 || j == 6)
					{
						character[i][j] = ' ';
					}
					else
					{
						character[i][j] = '%';
					}
				}

				if (i == 3)
				{

					if (j == 0 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 4)
				{
					if (j == 4 || j == 5 || j == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **t()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					character[i][j] = '%';
				}

				if (i >= 1 && i <= 6)
				{
					if (j == 3 || j == 2)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **u()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i <= 5)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
				else
				{
					if (j == 0 || j == 6)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}
			}
		}
		return character;
	}

	char **v()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 5)
				{
					if (j % 3 == 0)
					{
						character[i][j] = ' ';
					}
					else
						character[i][j] = '%';
				}
				if (i < 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
					{
						character[i][j] = '%';
					}
					else
					{
						character[i][j] = ' ';
					}
				}
			}
		}
		return character;
	}

	char **w()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 1 || i == 2 || i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 2 || j == 4)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 4)
				{
					character[i][j] = '%';
				}

				if (i == 5)
				{
					if (j == 3)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	char **x()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 1 || i == 5 || i == 6)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 2 || i == 4)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 3 || j == 2)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **y()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{

			for (int j = 0; j < 7; j++)
			{

				if (i == 0 || i == 1 || i == 2)
				{
					if (j == 2 || j == 3 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i == 3)
				{
					if (j == 0 || j == 5 || j == 6)
						character[i][j] = ' ';
					else
						character[i][j] = '%';
				}

				if (i > 3)
				{
					if (j == 2 || j == 3)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **z()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j > 0)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 1)
				{
					if (j == 5 || j == 6)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}

				if (i == 2)
				{
					if (j == 4 || j == 5)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 3 || j == 4)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 3 || j == 2)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
				if (i == 5)
				{
					if (j == 1 || j == 2)
						character[i][j] = '%';
					else
						character[i][j] = ' ';
				}
			}

			// cout<<endl;
		}
		return character;
	}

	// digits
	char **zero()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '0';
					else
						character[i][j] = ' ';
				}
				if (i == 1 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '0';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6)
						character[i][j] = '0';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 0 || j == 1 || j == 3 || j == 5 || j == 6)
						character[i][j] = '0';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 0 || j == 1 || j == 2 || j == 5 || j == 6)
						character[i][j] = '0';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **one()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 2 || i == 3 || i == 4 || i == 5)
				{
					if (j == 2 || j == 3)
						character[i][j] = '1';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 1 || j == 2 || j == 3)
						character[i][j] = '1';
					else
						character[i][j] = ' ';
				}
				if (i == 6)
				{
					if (j == 1 || j == 2 || j == 3 || j == 4)
						character[i][j] = '1';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}
	char **two()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 0 || j == 1 || j == 3 || j == 4)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 0 || j == 1 || j == 4 || j == 5)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 4 || j == 5)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 3 || j == 4)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 2 || j == 3)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 5)
				{
					if (j == 1 || j == 5 || j == 6)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
				if (i == 6)
				{
					if (j == 0 || j == 1 || j == 2 || j == 3 || j == 4 || j == 5)
						character[i][j] = '2';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **three()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '3';
					else
						character[i][j] = ' ';
				}
				if (i == 1 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '3';
					else
						character[i][j] = ' ';
				}
				if (i == 2 || i == 4)
				{
					if (j == 5 || j == 6)
						character[i][j] = '3';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 3 || j == 4 || j == 5)
						character[i][j] = '3';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **four()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 5 || i == 6)
				{
					if (j == 4 || j == 5)
						character[i][j] = '4';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 2 || j == 4 || j == 5)
						character[i][j] = '4';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '4';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 0 || j == 1 || j == 4 || j == 5)
						character[i][j] = '4';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 0 || j == 1 || j == 2 || j == 4 || j == 5 || j == 6)
						character[i][j] = '4';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **five()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 0 || j == 1 || j == 2 || j == 3 || j == 4 || j == 5)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 0 || j == 1)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 0 || j == 1 || j == 3 || j == 4)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 4 || j == 5)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
				if (i == 4 || i == 5)
				{
					if (j == 0 || j == 1 || j == 4 || j == 5)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
				if (i == 6)
				{
					if (j == 2 || j == 3 || j == 4)
						character[i][j] = '5';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **six()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5 || j == 6)
						character[i][j] = '6';
					else
						character[i][j] = ' ';
				}
				if (i == 1 || i == 4 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '6';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 0 || j == 1)
						character[i][j] = '6';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 0 || j == 1 || j == 3 || j == 4 || j == 5)
						character[i][j] = '6';
					else
						character[i][j] = ' ';
				}
				if (i == 6)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '6';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **seven()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0)
				{
					if (j == 0 || j == 1 || j == 2 || j == 3 || j == 4 || j == 5)
						character[i][j] = '7';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 0 || j == 1 || j == 5)
						character[i][j] = '7';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 3 || j == 4)
						character[i][j] = '7';
					else
						character[i][j] = ' ';
				}
				if (i == 3 || i == 4 || i == 5 || i == 6)
				{
					if (j == 2 || j == 3)
						character[i][j] = '7';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **eight()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 3 || i == 6)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '8';
					else
						character[i][j] = ' ';
				}
				if (i == 1 || i == 2 || i == 4 || i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '8';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	char **nine()
	{
		char **character = getCharGrid();
		for (int i = 0; i < 7; i++)
		{
			for (int j = 0; j < 7; j++)
			{
				if (i == 0 || i == 6)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
				if (i == 1)
				{
					if (j == 1 || j == 5 || j == 6)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
				if (i == 2)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
				if (i == 3)
				{
					if (j == 1 || j == 2 || j == 4 || j == 5 || j == 6)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
				if (i == 4)
				{
					if (j == 5 || j == 6)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
				if (i == 5)
				{
					if (j == 0 || j == 1 || j == 5 || j == 6)
						character[i][j] = '9';
					else
						character[i][j] = ' ';
				}
			}
		}
		return character;
	}

	 // for space
     char **space()
    {
        char **character = getCharGrid(1,1);

        character[0][0] = ' ';

        return character;
    }
};


namespace ascii
{

    class Ascii
    {

    public:
        Fonts *font;
        Ascii()
        {
            this->font = new SevenStar();
        }
        void print(const std::string &text)
        {

            char **character = 0;

            for (size_t i = 0; i < text.size(); i++)
            {
                char c = text[i];

                // Uppercase alphabets
                if (c == 'A')
                    character = font->A();
                else if (c == 'B')
                    character = font->B();
                else if (c == 'C')
                    character = font->C();
                else if (c == 'D')
                    character = font->D();
                else if (c == 'E')
                    character = font->E();
                else if (c == 'F')
                    character = font->F();
                else if (c == 'G')
                    character = font->G();
                else if (c == 'H')
                    character = font->H();
                else if (c == 'I')
                    character = font->I();
                else if (c == 'J')
                    character = font->J();
                else if (c == 'K')
                    character = font->K();
                else if (c == 'L')
                    character = font->L();
                else if (c == 'M')
                    character = font->M();
                else if (c == 'N')
                    character = font->N();
                else if (c == 'O')
                    character = font->O();
                else if (c == 'P')
                    character = font->P();
                else if (c == 'Q')
                    character = font->Q();
                else if (c == 'R')
                    character = font->R();
                else if (c == 'S')
                    character = font->S();
                else if (c == 'T')
                    character = font->T();
                else if (c == 'U')
                    character = font->U();
                else if (c == 'V')
                    character = font->V();
                else if (c == 'W')
                    character = font->W();
                else if (c == 'X')
                    character = font->X();
                else if (c == 'Y')
                    character = font->Y();
                else if (c == 'Z')
                    character = font->Z();

                // Lowercase alphabets
                else if (c == 'a')
                    character = font->a();
                else if (c == 'b')
                    character = font->b();
                else if (c == 'c')
                    character = font->c();
                else if (c == 'd')
                    character = font->d();
                else if (c == 'e')
                    character = font->e();
                else if (c == 'f')
                    character = font->f();
                else if (c == 'g')
                    character = font->g();
                else if (c == 'h')
                    character = font->h();
                else if (c == 'i')
                    character = font->i();
                else if (c == 'j')
                    character = font->j();
                else if (c == 'k')
                    character = font->k();
                else if (c == 'l')
                    character = font->l();
                else if (c == 'm')
                    character = font->m();
                else if (c == 'n')
                    character = font->n();
                else if (c == 'o')
                    character = font->o();
                else if (c == 'p')
                    character = font->p();
                else if (c == 'q')
                    character = font->q();
                else if (c == 'r')
                    character = font->r();
                else if (c == 's')
                    character = font->s();
                else if (c == 't')
                    character = font->t();
                else if (c == 'u')
                    character = font->u();
                else if (c == 'v')
                    character = font->v();
                else if (c == 'w')
                    character = font->w();
                else if (c == 'x')
                    character = font->x();
                else if (c == 'y')
                    character = font->y();
                else if (c == 'z')
                    character = font->z();

                // Numbers
                else if (c == '0')
                    character = font->zero();
                else if (c == '1')
                    character = font->one();
                else if (c == '2')
                    character = font->two();
                else if (c == '3')
                    character = font->three();
                else if (c == '4')
                    character = font->four();
                else if (c == '5')
                    character = font->five();
                else if (c == '6')
                    character = font->six();
                else if (c == '7')
                    character = font->seven();
                else if (c == '8')
                    character = font->eight();
                else if (c == '9')
                    character = font->nine();

                // for space
                else if (c == ' ')
                    character = font->space();

                font->pushChar(character);
            }
            font->printvector();
            // font->destroyspace();
        }
    };
} // namespace ascii

/// LICENSE_END.23

int64_t g_allocatedram=0;
int64_t g_arrayram=0;

char* mymigliaia(int64_t i_bytes,char* i_buffer,int i_buffersize)
{
	if (i_buffer==NULL)
	{
		printf("02096: guru i_buffer null\n");
		exit(0);
	}
	if (i_buffersize<10)
	{
		printf("02106: guru buffer too small\n");
		exit(0);
	}
		if (i_bytes<0)
	{
		snprintf(i_buffer,10,"negative");
		return i_buffer;
	}

	char *p=&i_buffer[i_buffersize-1];
	unsigned int i=0;
	*p='\0';
	do
	{
		if ((i%3==0) && (i!=0))
			*--p='.';
		*--p='0'+i_bytes%10;
		i_bytes/=10;
		i++;
	} while(i_bytes!=0);
	return p;
}

/// very quick and very dirty output
inline char* migliaia(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}
inline char* migliaia2(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}
inline char* migliaia3(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}
inline char* migliaia4(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}
inline char* migliaia5(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}
inline char* migliaia6(int64_t n)
{
	static char retbuf[30];
	return mymigliaia(n,retbuf,30);
}

/// LICENSE_START.7
/// OK we need a fix for 64-byte-align problem on some Linux compiler
/*
https://github.com/embeddedartistry/embedded-resources/blob/master/examples/c/malloc_aligned.c
*/
#ifndef align_up
#define align_up(num, align) \
	(((num) + ((align) - 1)) & ~((align) - 1))
#endif
//Convenience macro for memalign, the linux API
///#define memalign(align, size) aligned_malloc(align, size)
//Number of bytes we're using for storing the aligned pointer offset
typedef uint16_t myoffset_t;
#define PTR_OFFSET_SZ sizeof(myoffset_t)
/**
* aligned_malloc takes in the requested alignment and size
*	We will call malloc with extra bytes for our header and the offset
*	required to guarantee the desired alignment.
*   Some fix by me (better error handling)
*/
void * aligned_malloc(size_t align, size_t size)
{
	void * ptr = NULL;
	//We want it to be a power of two since align_up operates on powers of two
///	assert((align & (align - 1)) == 0);
	if ((align & (align - 1)) != 0 || align < sizeof(void*)) 
        return NULL;
	
	if(align && size)
	{
		/*
		 * We know we have to fit an offset value
		 * We also allocate extra bytes to ensure we can meet the alignment
		 */
		uint32_t hdr_size = PTR_OFFSET_SZ + (align - 1);
		void * p = malloc(size + hdr_size);
		if(p)
		{
			/*
			 * Add the offset size to malloc's pointer (we will always store that)
			 * Then align the resulting value to the arget alignment
			 */
			ptr = (void *) align_up(((uintptr_t)p + PTR_OFFSET_SZ), align);

			//Calculate the offset and store it behind our aligned pointer
			///*((myoffset_t *)ptr - 1) = (myoffset_t)((uintptr_t)ptr - (uintptr_t)p);
			
			if (ptr)
            {
                // Calcola l'offset e memorizzalo
                *((myoffset_t *)ptr - 1) = (myoffset_t)((uintptr_t)ptr - (uintptr_t)p);
            }
            else
            {
                // Se il calcolo del puntatore fallisce, libera la memoria
                free(p);
                return NULL;
            }
			
		} // else NULL, could not malloc
		else
		{
			return NULL;
		}
	} //else NULL, invalid arguments
	return ptr;
}
/**
* aligned_free works like free(), but we work backwards from the returned
* pointer to find the correct offset and pointer location to return to free()
* Note that it is VERY BAD to call free() on an aligned_malloc() pointer.
*/
void aligned_free(void * ptr)
{
	///return;
	if (flagdebug5)
		myprintf("72252: aligned_free [1]\n");
	
	if (!ptr) return;
	if (flagdebug5)
		myprintf("72254: aligned_free [2]\n");
	
	//assert(ptr);
	/*
	* Walk backwards from the passed-in pointer to get the pointer offset
	* We convert to an offset_t pointer and rely on pointer math to get the data
	*/
	myoffset_t offset = *((myoffset_t *)ptr - 1);
	if (flagdebug5)
		myprintf("72254: aligned_free [3]\n");
	/*
	* Once we have the offset, we can get our original pointer and call free
	*/
	void * p = (void *)((uint8_t *)ptr - offset);
	if (flagdebug5)
		myprintf("72254: aligned_free [4] ptr %s  offset %s voidp %s\n",migliaia(int64_t(ptr)),migliaia2(int64_t(offset)),migliaia3(int64_t(p)));
	free(p);
	if (flagdebug5)
		myprintf("72254: aligned_free [5]\n");
}
/// LICENSE_END.7


/*
	Let's try to force align everywhere, on some kind of "choosy" CPUs
	I do not have such kind of iron, just a try...
*/
static void* franz_malloc(size_t i_size)
{
	assert(i_size>0);
	if  (i_size==0)
	{
		myprintf("06112: franz_malloc i_size zero!\n");
		exit(0);
	}
	g_allocatedram+=i_size;
		
#ifdef MALLOC_ALIGN
	return aligned_malloc(MALLOC_ALIGN,i_size);
#else
	return malloc(i_size);
#endif
}

static void franz_free(void* i_mem)
{
	if (i_mem==NULL)
		return;
#ifdef MALLOC_ALIGN
	aligned_free(i_mem);
#else
	free(i_mem);
#endif
}

static void* franz_extend(void* i_mem,size_t i_size,size_t i_oldsize)
{
#ifdef MALLOC_ALIGN
    void *new_mem;
	if (i_size==0)
	{
///#ifdef DEBUG
		printf("01200: realloc i_size == 0\n");
///#endif
		franz_free(i_mem);
		return NULL;
	}
    if (!i_mem)
	{
///#ifdef DEBUG
		printf("01205: realloc from empty, allocating %08d\n",(int)i_size);
///#endif
		///g_allocatedram+=i_size;
        return franz_malloc(i_size);
	}
	if (i_oldsize<i_size)
	{
///#ifdef DEBUG
		if (flagdebug)
			myprintf("01211: realloc from %08d to %08d\n",(int)i_oldsize,(int)i_size);
///#endif
        new_mem=franz_malloc(i_size);
        if (new_mem==NULL)
			return NULL;
		memcpy(new_mem,i_mem,i_oldsize);
		franz_free(i_mem);
		///g_allocatedram+=i_size;
		g_allocatedram-=i_oldsize;
		return new_mem;
	}
///#ifdef DEBUG
	printf("01219: realloc smaller, do nothing\n");
///#endif
	return i_mem;

#else
	if (i_oldsize>0)
		i_oldsize++; /// compiler be quiet!
	g_allocatedram+=i_size;
	g_allocatedram-=i_oldsize;
	return realloc(i_mem,i_size);
#endif
}

void mystrrev(char *i_str)
{
	if (i_str==NULL)
		return;
	int i,j;
	char a;
	unsigned len=strlen((const char *)i_str);
	for (i=0,j=len-1;i<j;i++,j--)
	{
		a=i_str[i];
		i_str[i]=i_str[j];
		i_str[j]=a;
	}
}
std::string myulltoa(uint64_t value,int i_len)
{
	if (i_len>19)
		i_len=19;
	static const char xlat[] = "0123456789";
	char tmp[20];
	char *p1=tmp;
	memset(tmp,0,sizeof(tmp));
	do
	{
		*p1++ = xlat[value % 10];
	} while((value /= 10));
	mystrrev(&tmp[0]);
	std::string risultato=tmp;
	if (i_len>0)
		if (risultato.size()<(unsigned int)i_len)
			risultato=std::string(i_len-risultato.size(), '0') + risultato;
	return risultato;
}

std::string bin2hex_32(uint32_t i_thenumber)
{
    static const char dec2hex[16+1] = "0123456789ABCDEF";
	char buf[16+1]; ///yep, a bit large
	uint8_t numerino;
	for (int j=3;j>=0;j--)
	{
		numerino=i_thenumber&255;
		buf[j*2+1]	=dec2hex[numerino&15];
		buf[j*2]	=dec2hex[(numerino>>4)&15];
		i_thenumber>>=8;
	}
	buf[8]=0;
	return buf;
}

std::string bin2hex_64(uint64_t i_thenumber)
{
    static const char dec2hex[16+1] = "0123456789ABCDEF";
	char buf[16+1];
	uint8_t numerino;
	for (int j=7;j>=0;j--)
	{
		numerino=i_thenumber&255;
		buf[j*2+1]	=dec2hex[numerino&15];
		buf[j*2]	=dec2hex[(numerino>>4)&15];
		i_thenumber>>=8;
	}
	buf[16]=0;
	return buf;
}


std::string bin2hex_128(uint64_t i_high,uint64_t i_low)
{
	std::string shigh=bin2hex_64(i_high);
	std::string slow	=bin2hex_64(i_low);
	return shigh+slow;
}





#ifdef _WIN32
/// LICENSE_START.22

/// A "stripped" LZ4 

#define LZ4_HEAPMODE 0
#define LZ4_ACCELERATION_DEFAULT 1
#define LZ4_ACCELERATION_MAX 65537
#define LZ4_MEMORY_USAGE_MIN 10   //1KB
#define LZ4_MEMORY_USAGE_DEFAULT 14 /// 16KB
#define LZ4_MEMORY_USAGE_MAX 20
#define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
#define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
#define LZ4_FORCE_O2
#define LZ4_ALIGN_TEST 1
#define LZ4_STREAMDECODE_MINSIZE 32
#define LZ4_STREAM_MINSIZE  ((1UL << (LZ4_MEMORY_USAGE)) + 32)  /* static size, for inter-version compatibility */

#define MINMATCH 4
#define WILDCOPYLENGTH 8
#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
#define FASTLOOP_SAFE_DISTANCE 64
static const int LZ4_minLength = (MFLIMIT+1);

#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)

#define LZ4_DISTANCE_ABSOLUTE_MAX 65535

#define ML_BITS  4
#define ML_MASK  ((1U<<ML_BITS)-1)
#define RUN_BITS (8-ML_BITS)
#define RUN_MASK ((1U<<RUN_BITS)-1)

#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
#define DEBUGLOG(l, ...) {}    /* disabled */

#define LZ4_FORCE_MEMORY_ACCESS 1

typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
typedef  int8_t  LZ4_i8;
typedef uint8_t  LZ4_byte;
typedef uint16_t LZ4_u16;
typedef uint32_t LZ4_u32;

int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode,
                        const char* src, char* dst,
                        int srcSize, int dstCapacity);


typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
struct LZ4_stream_t_internal {
    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
    const LZ4_byte* dictionary;
    const LZ4_stream_t_internal* dictCtx;
    LZ4_u32 currentOffset;
    LZ4_u32 tableType;
    LZ4_u32 dictSize;
    /* Implicit padding to ensure structure is aligned */
};

union LZ4_stream_u {
    char minStateSize[LZ4_STREAM_MINSIZE];
    LZ4_stream_t_internal internal_donotuse;
}; /* previously typedef'd to LZ4_stream_t */

LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);

typedef struct {
    const LZ4_byte* externalDict;
    const LZ4_byte* prefixEnd;
    size_t extDictSize;
    size_t prefixSize;
} LZ4_streamDecode_t_internal;


union LZ4_streamDecode_u {
    char minStateSize[LZ4_STREAMDECODE_MINSIZE];
    LZ4_streamDecode_t_internal internal_donotuse;
} ;   /* previously typedef'd to LZ4_streamDecode_t */


#ifndef LZ4_FORCE_INLINE
#  ifdef _MSC_VER    /* Visual Studio */
#    define LZ4_FORCE_INLINE static __forceinline
#  else
#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
#      ifdef __GNUC__
#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
#      else
#        define LZ4_FORCE_INLINE static inline
#      endif
#    else
#      define LZ4_FORCE_INLINE static
#    endif /* __STDC_VERSION__ */
#  endif  /* _MSC_VER */
#endif /* LZ4_FORCE_INLINE */


#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
#else
#  define expect(expr,value)    (expr)
#endif

#ifndef likely
#define likely(expr)     expect((expr) != 0, 1)
#endif
#ifndef unlikely
#define unlikely(expr)   expect((expr) != 0, 0)
#endif

#define ALLOC(s)          malloc(s)
#define ALLOC_AND_ZERO(s) calloc(1,s)
#define FREEMEM(p)        free(p)
#define LZ4_memset(p,v,s) memset((p),(v),(s))
#define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))


static int LZ4_isAligned(const void* ptr, size_t alignment)
{
    return ((size_t)ptr & (alignment -1)) == 0;
}

typedef  uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef  int32_t S32;
typedef uint64_t U64;
typedef uintptr_t uptrval;

#if defined(__x86_64__)
  typedef U64    reg_t;   /* 64-bits in x32 mode */
#else
  typedef size_t reg_t;   /* 32-bits in x32 mode */
#endif

typedef enum {
    notLimited = 0,
    limitedOutput = 1,
    fillOutput = 2
} limitedOutput_directive;


#if !defined(LZ4_memcpy)
#  if defined(__GNUC__) && (__GNUC__ >= 4)
#    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
#  else
#    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
#  endif
#endif

#if !defined(LZ4_memmove)
#  if defined(__GNUC__) && (__GNUC__ >= 4)
#    define LZ4_memmove __builtin_memmove
#  else
#    define LZ4_memmove memmove
#  endif
#endif

static unsigned LZ4_isLittleEndian(void)
{
	return 1;
	
    ///const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
    ///return one.c[0];
}

#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
#elif defined(_MSC_VER)
#define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
#endif

LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;

static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; }
static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; }
static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; }

static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; }
static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; }



static U16 LZ4_readLE16(const void* memPtr)
{
    if (LZ4_isLittleEndian()) {
        return LZ4_read16(memPtr);
    } else {
        const BYTE* p = (const BYTE*)memPtr;
        return (U16)((U16)p[0] + (p[1]<<8));
    }
}

static void LZ4_writeLE16(void* memPtr, U16 value)
{
    if (LZ4_isLittleEndian()) {
        LZ4_write16(memPtr, value);
    } else {
        BYTE* p = (BYTE*)memPtr;
        p[0] = (BYTE) value;
        p[1] = (BYTE)(value>>8);
    }
}

/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
LZ4_FORCE_INLINE
void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
{
    BYTE* d = (BYTE*)dstPtr;
    const BYTE* s = (const BYTE*)srcPtr;
    BYTE* const e = (BYTE*)dstEnd;

    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
}

static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};


#ifndef LZ4_FAST_DEC_LOOP
#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
#    define LZ4_FAST_DEC_LOOP 1
#  elif defined(__aarch64__) && defined(__APPLE__)
#    define LZ4_FAST_DEC_LOOP 1
#  elif defined(__aarch64__) && !defined(__clang__)
     /* On non-Apple aarch64, we disable this optimization for clang because
      * on certain mobile chipsets, performance is reduced with clang. For
      * more information refer to https://github.com/lz4/lz4/pull/707 */
#    define LZ4_FAST_DEC_LOOP 1
#  else
#    define LZ4_FAST_DEC_LOOP 0
#  endif
#endif

#if LZ4_FAST_DEC_LOOP

LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
{
    assert(srcPtr + offset == dstPtr);
    if (offset < 8) {
        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
        dstPtr[0] = srcPtr[0];
        dstPtr[1] = srcPtr[1];
        dstPtr[2] = srcPtr[2];
        dstPtr[3] = srcPtr[3];
		
        srcPtr += inc32table[offset];
        LZ4_memcpy(dstPtr+4, srcPtr, 4);
        srcPtr -= dec64table[offset];
        dstPtr += 8;
    } else {
        LZ4_memcpy(dstPtr, srcPtr, 8);
        dstPtr += 8;
        srcPtr += 8;
    }

    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
}

/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
 * this version copies two times 16 bytes (instead of one time 32 bytes)
 * because it must be compatible with offsets >= 16. */
LZ4_FORCE_INLINE void
LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
{
    BYTE* d = (BYTE*)dstPtr;
    const BYTE* s = (const BYTE*)srcPtr;
    BYTE* const e = (BYTE*)dstEnd;

    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
}

/* LZ4_memcpy_using_offset()  presumes :
 * - dstEnd >= dstPtr + MINMATCH
 * - there is at least 8 bytes available to write after dstEnd */
LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
{
    BYTE v[8];

    assert(dstEnd >= dstPtr + MINMATCH);

    switch(offset) {
    case 1:
        MEM_INIT(v, *srcPtr, 8);
        break;
    case 2:
        LZ4_memcpy(v, srcPtr, 2);
        LZ4_memcpy(&v[2], srcPtr, 2);
#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
#  pragma warning(push)
#  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
#endif
        LZ4_memcpy(&v[4], v, 4);
#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
#  pragma warning(pop)
#endif
        break;
    case 4:
        LZ4_memcpy(v, srcPtr, 4);
        LZ4_memcpy(&v[4], srcPtr, 4);
        break;
    default:
        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
        return;
    }

    LZ4_memcpy(dstPtr, v, 8);
    dstPtr += 8;
    while (dstPtr < dstEnd) {
        LZ4_memcpy(dstPtr, v, 8);
        dstPtr += 8;
    }
}
#endif


/*-************************************
*  Common functions
**************************************/
static unsigned LZ4_NbCommonBytes (reg_t val)
{
    assert(val != 0);
    if (LZ4_isLittleEndian()) {
        if (sizeof(val) == 8) {
#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
/*-*************************************************************************************************
* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
****************************************************************************************************/
#         if defined(__clang__) && (__clang_major__ < 10)
            /* Avoid undefined clang-cl intrinsics issue.
             * See https://github.com/lz4/lz4/pull/1017 for details. */
            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
#         else
            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
            return (unsigned)_tzcnt_u64(val) >> 3;
#         endif
#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
            unsigned long r = 0;
            _BitScanForward64(&r, (U64)val);
            return (unsigned)r >> 3;
#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
                                        !defined(LZ4_FORCE_SW_BITCOUNT)
            return (unsigned)__builtin_ctzll((U64)val) >> 3;
#       else
            const U64 m = 0x0101010101010101ULL;
            val ^= val - 1;
            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
#       endif
        } else /* 32 bits */ {
#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
            unsigned long r;
            _BitScanForward(&r, (U32)val);
            return (unsigned)r >> 3;
#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
            return (unsigned)__builtin_ctz((U32)val) >> 3;
#       else
            const U32 m = 0x01010101;
            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
#       endif
        }
    } else   /* Big Endian CPU */ {
        if (sizeof(val)==8) {
#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
            return (unsigned)__builtin_clzll((U64)val) >> 3;
#       else
#if 1
            /* this method is probably faster,
             * but adds a 128 bytes lookup table */
            static const unsigned char ctz7_tab[128] = {
                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
            };
            U64 const mask = 0x0101010101010101ULL;
            U64 const t = (((val >> 8) - mask) | val) & mask;
            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
#else
            /* this method doesn't consume memory space like the previous one,
             * but it contains several branches,
             * that may end up slowing execution */
            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
            Note that this code path is never triggered in 32-bits mode. */
            unsigned r;
            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
            r += (!val);
            return r;
#endif
#       endif
        } else /* 32 bits */ {
#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
                                        !defined(LZ4_FORCE_SW_BITCOUNT)
            return (unsigned)__builtin_clz((U32)val) >> 3;
#       else
            val >>= 8;
            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
              (val + 0x00FF0000)) >> 24;
            return (unsigned)val ^ 3;
#       endif
        }
    }
}


#define STEPSIZE sizeof(reg_t)
LZ4_FORCE_INLINE
unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
{
    const BYTE* const pStart = pIn;

    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
        if (!diff) {
            pIn+=STEPSIZE; pMatch+=STEPSIZE;
        } else {
            return LZ4_NbCommonBytes(diff);
    }   }

    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
        pIn += LZ4_NbCommonBytes(diff);
        return (unsigned)(pIn - pStart);
    }

    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
    return (unsigned)(pIn - pStart);
}


static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */

typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;


int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
                                     int compressedSize, int maxOutputSize,
                                     const void* dictStart, size_t dictSize);
int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
                                     int compressedSize, int targetOutputSize, int dstCapacity,
                                     const void* dictStart, size_t dictSize);

LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
{
    if (tableType == byU16)
        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
    else
        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
}

LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
{
    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
    if (LZ4_isLittleEndian()) {
        const U64 prime5bytes = 889523592379ULL;
        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
    } else {
        const U64 prime8bytes = 11400714785074694791ULL;
        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
    }
}

LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
{
    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
    return LZ4_hash4(LZ4_read32(p), tableType);
}

LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
{
    switch (tableType)
    {
    default: /* fallthrough */
    case clearedTable: { /* illegal! */ assert(0); return; }
    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
    }
}

LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
{
    switch (tableType)
    {
    default: /* fallthrough */
    case clearedTable: /* fallthrough */
    case byPtr: { /* illegal! */ assert(0); return; }
    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
    }
}

/* LZ4_putPosition*() : only used in byPtr mode */
LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
                                  void* tableBase, tableType_t const tableType)
{
    const BYTE** const hashTable = (const BYTE**)tableBase;
    assert(tableType == byPtr); (void)tableType;
    hashTable[h] = p;
}

LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType)
{
    U32 const h = LZ4_hashPosition(p, tableType);
    LZ4_putPositionOnHash(p, h, tableBase, tableType);
}

/* LZ4_getIndexOnHash() :
 * Index of match position registered in hash table.
 * hash position must be calculated by using base+index, or dictBase+index.
 * Assumption 1 : only valid if tableType == byU32 or byU16.
 * Assumption 2 : h is presumed valid (within limits of hash table)
 */
LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
{
    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
    if (tableType == byU32) {
        const U32* const hashTable = (const U32*) tableBase;
        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
        return hashTable[h];
    }
    if (tableType == byU16) {
        const U16* const hashTable = (const U16*) tableBase;
        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
        return hashTable[h];
    }
    assert(0); return 0;  /* forbidden case */
}

static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType)
{
    assert(tableType == byPtr); (void)tableType;
    { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
}

LZ4_FORCE_INLINE const BYTE*
LZ4_getPosition(const BYTE* p,
                const void* tableBase, tableType_t tableType)
{
    U32 const h = LZ4_hashPosition(p, tableType);
    return LZ4_getPositionOnHash(h, tableBase, tableType);
}


/** LZ4_compress_generic_validated() :
 *  inlined, to ensure branches are decided at compilation time.
 *  The following conditions are presumed already validated:
 *  - source != NULL
 *  - inputSize > 0
 */
LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
                 LZ4_stream_t_internal* const cctx,
                 const char* const source,
                 char* const dest,
                 const int inputSize,
                 int*  inputConsumed, /* only written when outputDirective == fillOutput */
                 const int maxOutputSize,
                 const limitedOutput_directive outputDirective,
                 const tableType_t tableType,
                 const dict_directive dictDirective,
                 const dictIssue_directive dictIssue,
                 const int acceleration)
{
    int result;
    const BYTE* ip = (const BYTE*)source;

    U32 const startIndex = cctx->currentOffset;
    const BYTE* base = (const BYTE*)source - startIndex;
    const BYTE* lowLimit;

    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
    const BYTE* const dictionary =
        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
    const U32 dictSize =
        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
    const U32 dictDelta =
        (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with indexes in current context */

    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
    const BYTE* anchor = (const BYTE*) source;
    const BYTE* const iend = ip + inputSize;
    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    const BYTE* const matchlimit = iend - LASTLITERALS;

    /* the dictCtx currentOffset is indexed on the start of the dictionary,
     * while a dictionary in the current context precedes the currentOffset */
    const BYTE* dictBase = (dictionary == NULL) ? NULL :
                           (dictDirective == usingDictCtx) ?
                            dictionary + dictSize - dictCtx->currentOffset :
                            dictionary + dictSize - startIndex;

    BYTE* op = (BYTE*) dest;
    BYTE* const olimit = op + maxOutputSize;

    U32 offset = 0;
    U32 forwardH;

    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
    assert(ip != NULL);
    if (tableType == byU16) assert(inputSize<LZ4_64Klimit);  /* Size too large (not within 64K limit) */
    if (tableType == byPtr) assert(dictDirective==noDict);   /* only supported use case with byPtr */
    /* If init conditions are not met, we don't have to mark stream
     * as having dirty context, since no action was taken yet */
    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
    assert(acceleration >= 1);

    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);

    /* Update context state */
    if (dictDirective == usingDictCtx) {
        /* Subsequent linked blocks can't use the dictionary. */
        /* Instead, they use the block we just compressed. */
        cctx->dictCtx = NULL;
        cctx->dictSize = (U32)inputSize;
    } else {
        cctx->dictSize += (U32)inputSize;
    }
    cctx->currentOffset += (U32)inputSize;
    cctx->tableType = (U32)tableType;

    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */

    /* First Byte */
    {   U32 const h = LZ4_hashPosition(ip, tableType);
        if (tableType == byPtr) {
            LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
        } else {
            LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
    }   }
    ip++; forwardH = LZ4_hashPosition(ip, tableType);

    /* Main Loop */
    for ( ; ; ) {
        const BYTE* match;
        BYTE* token;
        const BYTE* filledIp;

        /* Find a match */
        if (tableType == byPtr) {
            const BYTE* forwardIp = ip;
            int step = 1;
            int searchMatchNb = acceleration << LZ4_skipTrigger;
            do {
                U32 const h = forwardH;
                ip = forwardIp;
                forwardIp += step;
                step = (searchMatchNb++ >> LZ4_skipTrigger);

                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
                assert(ip < mflimitPlusOne);

                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
                forwardH = LZ4_hashPosition(forwardIp, tableType);
                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);

            } while ( (match+LZ4_DISTANCE_MAX < ip)
                   || (LZ4_read32(match) != LZ4_read32(ip)) );

        } else {   /* byU32, byU16 */

            const BYTE* forwardIp = ip;
            int step = 1;
            int searchMatchNb = acceleration << LZ4_skipTrigger;
            do {
                U32 const h = forwardH;
                U32 const current = (U32)(forwardIp - base);
                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
                assert(matchIndex <= current);
                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
                ip = forwardIp;
                forwardIp += step;
                step = (searchMatchNb++ >> LZ4_skipTrigger);

                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
                assert(ip < mflimitPlusOne);

                if (dictDirective == usingDictCtx) {
                    if (matchIndex < startIndex) {
                        /* there was no match, try the dictionary */
                        assert(tableType == byU32);
                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
                        match = dictBase + matchIndex;
                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
                        lowLimit = dictionary;
                    } else {
                        match = base + matchIndex;
                        lowLimit = (const BYTE*)source;
                    }
                } else if (dictDirective == usingExtDict) {
                    if (matchIndex < startIndex) {
                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
                        assert(startIndex - matchIndex >= MINMATCH);
                        assert(dictBase);
                        match = dictBase + matchIndex;
                        lowLimit = dictionary;
                    } else {
                        match = base + matchIndex;
                        lowLimit = (const BYTE*)source;
                    }
                } else {   /* single continuous memory segment */
                    match = base + matchIndex;
                }
                forwardH = LZ4_hashPosition(forwardIp, tableType);
                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);

                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
                assert(matchIndex < current);
                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
                    continue;
                } /* too far */
                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */

                if (LZ4_read32(match) == LZ4_read32(ip)) {
                    if (maybe_extMem) offset = current - matchIndex;
                    break;   /* match found */
                }

            } while(1);
        }

        /* Catch up */
        filledIp = ip;
        assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */
        if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
            do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1])));
        }

        /* Encode Literals */
        {   unsigned const litLength = (unsigned)(ip - anchor);
            token = op++;
            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
            }
            if ((outputDirective == fillOutput) &&
                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
                op--;
                goto _last_literals;
            }
            if (litLength >= RUN_MASK) {
                int len = (int)(litLength - RUN_MASK);
                *token = (RUN_MASK<<ML_BITS);
                for(; len >= 255 ; len-=255) *op++ = 255;
                *op++ = (BYTE)len;
            }
            else *token = (BYTE)(litLength<<ML_BITS);

            /* Copy Literals */
            LZ4_wildCopy8(op, anchor, op+litLength);
            op+=litLength;
            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
        }

_next_match:
        /* at this stage, the following variables must be correctly set :
         * - ip : at start of LZ operation
         * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
         * - offset : if maybe_ext_memSegment==1 (constant)
         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
         */

        if ((outputDirective == fillOutput) &&
            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
            /* the match was too close to the end, rewind and go to last literals */
            op = token;
            goto _last_literals;
        }

        /* Encode Offset */
        if (maybe_extMem) {   /* static test */
            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
            LZ4_writeLE16(op, (U16)offset); op+=2;
        } else  {
            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
            assert(ip-match <= LZ4_DISTANCE_MAX);
            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
        }

        /* Encode MatchLength */
        {   unsigned matchCode;

            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
              && (lowLimit==dictionary) /* match within extDict */ ) {
                const BYTE* limit = ip + (dictEnd-match);
                assert(dictEnd > match);
                if (limit > matchlimit) limit = matchlimit;
                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
                ip += (size_t)matchCode + MINMATCH;
                if (ip==limit) {
                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
                    matchCode += more;
                    ip += more;
                }
                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
            } else {
                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
                ip += (size_t)matchCode + MINMATCH;
                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
            }

            if ((outputDirective) &&    /* Check output buffer overflow */
                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
                if (outputDirective == fillOutput) {
                    /* Match description too long : reduce it */
                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
                    ip -= matchCode - newMatchCode;
                    assert(newMatchCode < matchCode);
                    matchCode = newMatchCode;
                    if (unlikely(ip <= filledIp)) {
                        /* We have already filled up to filledIp so if ip ends up less than filledIp
                         * we have positions in the hash table beyond the current position. This is
                         * a problem if we reuse the hash table. So we have to remove these positions
                         * from the hash table.
                         */
                        const BYTE* ptr;
                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
                        for (ptr = ip; ptr <= filledIp; ++ptr) {
                            U32 const h = LZ4_hashPosition(ptr, tableType);
                            LZ4_clearHash(h, cctx->hashTable, tableType);
                        }
                    }
                } else {
                    assert(outputDirective == limitedOutput);
                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
                }
            }
            if (matchCode >= ML_MASK) {
                *token += ML_MASK;
                matchCode -= ML_MASK;
                LZ4_write32(op, 0xFFFFFFFF);
                while (matchCode >= 4*255) {
                    op+=4;
                    LZ4_write32(op, 0xFFFFFFFF);
                    matchCode -= 4*255;
                }
                op += matchCode / 255;
                *op++ = (BYTE)(matchCode % 255);
            } else
                *token += (BYTE)(matchCode);
        }
        /* Ensure we have enough space for the last literals. */
        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));

        anchor = ip;

        /* Test end of chunk */
        if (ip >= mflimitPlusOne) break;

        /* Fill table */
        {   U32 const h = LZ4_hashPosition(ip-2, tableType);
            if (tableType == byPtr) {
                LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr);
            } else {
                U32 const idx = (U32)((ip-2) - base);
                LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
        }   }

        /* Test next position */
        if (tableType == byPtr) {

            match = LZ4_getPosition(ip, cctx->hashTable, tableType);
            LZ4_putPosition(ip, cctx->hashTable, tableType);
            if ( (match+LZ4_DISTANCE_MAX >= ip)
              && (LZ4_read32(match) == LZ4_read32(ip)) )
            { token=op++; *token=0; goto _next_match; }

        } else {   /* byU32, byU16 */

            U32 const h = LZ4_hashPosition(ip, tableType);
            U32 const current = (U32)(ip-base);
            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
            assert(matchIndex < current);
            if (dictDirective == usingDictCtx) {
                if (matchIndex < startIndex) {
                    /* there was no match, try the dictionary */
                    assert(tableType == byU32);
                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
                    match = dictBase + matchIndex;
                    lowLimit = dictionary;   /* required for match length counter */
                    matchIndex += dictDelta;
                } else {
                    match = base + matchIndex;
                    lowLimit = (const BYTE*)source;  /* required for match length counter */
                }
            } else if (dictDirective==usingExtDict) {
                if (matchIndex < startIndex) {
                    assert(dictBase);
                    match = dictBase + matchIndex;
                    lowLimit = dictionary;   /* required for match length counter */
                } else {
                    match = base + matchIndex;
                    lowLimit = (const BYTE*)source;   /* required for match length counter */
                }
            } else {   /* single memory segment */
                match = base + matchIndex;
            }
            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
            assert(matchIndex < current);
            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
                token=op++;
                *token=0;
                if (maybe_extMem) offset = current - matchIndex;
                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
                goto _next_match;
            }
        }

        /* Prepare next loop */
        forwardH = LZ4_hashPosition(++ip, tableType);

    }

_last_literals:
    /* Encode Last Literals */
    {   size_t lastRun = (size_t)(iend - anchor);
        if ( (outputDirective) &&  /* Check output buffer overflow */
            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
            if (outputDirective == fillOutput) {
                /* adapt lastRun to fill 'dst' */
                assert(olimit >= op);
                lastRun  = (size_t)(olimit-op) - 1/*token*/;
                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
            } else {
                assert(outputDirective == limitedOutput);
                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
            }
        }
        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
        if (lastRun >= RUN_MASK) {
            size_t accumulator = lastRun - RUN_MASK;
            *op++ = RUN_MASK << ML_BITS;
            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
            *op++ = (BYTE) accumulator;
        } else {
            *op++ = (BYTE)(lastRun<<ML_BITS);
        }
        LZ4_memcpy(op, anchor, lastRun);
        ip = anchor + lastRun;
        op += lastRun;
    }

    if (outputDirective == fillOutput) {
        *inputConsumed = (int) (((const char*)ip)-source);
    }
    result = (int)(((char*)op) - dest);
    assert(result > 0);
    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
    return result;
}

/** LZ4_compress_generic() :
 *  inlined, to ensure branches are decided at compilation time;
 *  takes care of src == (NULL, 0)
 *  and forward the rest to LZ4_compress_generic_validated */
LZ4_FORCE_INLINE int LZ4_compress_generic(
                 LZ4_stream_t_internal* const cctx,
                 const char* const src,
                 char* const dst,
                 const int srcSize,
                 int *inputConsumed, /* only written when outputDirective == fillOutput */
                 const int dstCapacity,
                 const limitedOutput_directive outputDirective,
                 const tableType_t tableType,
                 const dict_directive dictDirective,
                 const dictIssue_directive dictIssue,
                 const int acceleration)
{
    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
                srcSize, dstCapacity);

    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
        DEBUGLOG(5, "Generating an empty block");
        assert(outputDirective == notLimited || dstCapacity >= 1);
        assert(dst != NULL);
        dst[0] = 0;
        if (outputDirective == fillOutput) {
            assert (inputConsumed != NULL);
            *inputConsumed = 0;
        }
        return 1;
    }
    assert(src != NULL);

    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
                inputConsumed, /* only written into if outputDirective == fillOutput */
                dstCapacity, outputDirective,
                tableType, dictDirective, dictIssue, acceleration);
}


LZ4_stream_t* LZ4_createStream(void)
{
    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
	g_allocatedram+=sizeof(LZ4_stream_t);
    LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
    if (lz4s == NULL) return NULL;
    LZ4_initStream(lz4s, sizeof(*lz4s));
    return lz4s;
}

static size_t LZ4_stream_t_alignment(void)
{
#if LZ4_ALIGN_TEST
    typedef struct { char c; LZ4_stream_t t; } t_a;
    return sizeof(t_a) - sizeof(LZ4_stream_t);
#else
    return 1;  /* effectively disabled */
#endif
}

LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
{
    DEBUGLOG(5, "LZ4_initStream");
    if (buffer == NULL) { return NULL; }
    if (size < sizeof(LZ4_stream_t)) { return NULL; }
    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
    return (LZ4_stream_t*)buffer;
}

int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
{
    if (!LZ4_stream) return 0;   /* support free on NULL */
    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
    FREEMEM(LZ4_stream);
    return (0);
}


#define HASH_UNIT sizeof(reg_t)


static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
{
    assert(nextSize >= 0);
    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
        /* rescale hash table */
        U32 const delta = LZ4_dict->currentOffset - 64 KB;
        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
        int i;
        DEBUGLOG(4, "LZ4_renormDictT");
        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
            else LZ4_dict->hashTable[i] -= delta;
        }
        LZ4_dict->currentOffset = 64 KB;
        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
    }
}


int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
                                const char* source, char* dest,
                                int inputSize, int maxOutputSize,
                                int acceleration)
{
    const tableType_t tableType = byU32;
    LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
    const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;

    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);

    LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;

    /* invalidate tiny dictionaries */
    if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
      && (dictEnd != source)           /* prefix mode */
      && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
      && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
      ) {
        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
        /* remove dictionary existence from history, to employ faster prefix mode */
        streamPtr->dictSize = 0;
        streamPtr->dictionary = (const BYTE*)source;
        dictEnd = source;
    }

    /* Check overlapping input/dictionary space */
    {   const char* const sourceEnd = source + inputSize;
        if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
            streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
        }
    }

    /* prefix mode : source data follows dictionary */
    if (dictEnd == source) {
        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
        else
            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
    }

    /* external dictionary mode */
    {   int result;
        if (streamPtr->dictCtx) {
            /* We depend here on the fact that dictCtx'es (produced by
             * LZ4_loadDict) guarantee that their tables contain no references
             * to offsets between dictCtx->currentOffset - 64 KB and
             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
             * to use noDictIssue even when the dict isn't a full 64 KB.
             */
            if (inputSize > 4 KB) {
                /* For compressing large blobs, it is faster to pay the setup
                 * cost to copy the dictionary's tables into the active context,
                 * so that the compression loop is only looking into one table.
                 */
                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
            } else {
                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
            }
        } else {  /* small data <= 4 KB */
            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
            } else {
                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
            }
        }
        streamPtr->dictionary = (const BYTE*)source;
        streamPtr->dictSize = (U32)inputSize;
        return result;
    }
}

typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;

#undef MIN
#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )



/* Read the variable-length literal or match length.
 *
 * @ip : input pointer
 * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
 * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
 * @error (output) - error code.  Must be set to 0 before call.
**/
typedef size_t Rvl_t;
static const Rvl_t rvl_error = (Rvl_t)(-1);
LZ4_FORCE_INLINE Rvl_t
read_variable_length(const BYTE** ip, const BYTE* ilimit,
                     int initial_check)
{
    Rvl_t s, length = 0;
    assert(ip != NULL);
    assert(*ip !=  NULL);
    assert(ilimit != NULL);
    if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
        return rvl_error;
    }
    s = **ip;
    (*ip)++;
    length += s;
    if (unlikely((*ip) > ilimit)) {    /* read limit reached */
        return rvl_error;
    }
    /* accumulator overflow detection (32-bit mode only) */
    if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
        return rvl_error;
    }
    if (likely(s != 255)) return length;
    do {
        s = **ip;
        (*ip)++;
        length += s;
        if (unlikely((*ip) > ilimit)) {    /* read limit reached */
            return rvl_error;
        }
        /* accumulator overflow detection (32-bit mode only) */
        if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
            return rvl_error;
        }
    } while (s == 255);

    return length;
}

/*! LZ4_decompress_generic() :
 *  This generic decompression function covers all use cases.
 *  It shall be instantiated several times, using different sets of directives.
 *  Note that it is important for performance that this function really get inlined,
 *  in order to remove useless branches during compilation optimization.
 */
LZ4_FORCE_INLINE int
LZ4_decompress_generic(
                 const char* const src,
                 char* const dst,
                 int srcSize,
                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */

                 earlyEnd_directive partialDecoding,  /* full, partial */
                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
                 const size_t dictSize         /* note : = 0 if noDict */
                 )
{
    if ((src == NULL) || (outputSize < 0)) { return -1; }

    {   const BYTE* ip = (const BYTE*) src;
        const BYTE* const iend = ip + srcSize;

        BYTE* op = (BYTE*) dst;
        BYTE* const oend = op + outputSize;
        BYTE* cpy;

        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;

        const int checkOffset = (dictSize < (int)(64 KB));


        /* Set up the "end" pointers for the shortcut. */
        const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
        const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;

        const BYTE* match;
        size_t offset;
        unsigned token;
        size_t length;


        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);

        /* Special cases */
        assert(lowPrefix <= op);
        if (unlikely(outputSize==0)) {
            /* Empty output buffer */
            if (partialDecoding) return 0;
            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
        }
        if (unlikely(srcSize==0)) { return -1; }

    /* LZ4_FAST_DEC_LOOP:
     * designed for modern OoO performance cpus,
     * where copying reliably 32-bytes is preferable to an unpredictable branch.
     * note : fast loop may show a regression for some client arm chips. */
#if LZ4_FAST_DEC_LOOP
        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
            DEBUGLOG(6, "skip fast decode loop");
            goto safe_decode;
        }

        /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
        DEBUGLOG(6, "using fast decode loop");
        while (1) {
            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
            assert(ip < iend);
            token = *ip++;
            length = token >> ML_BITS;  /* literal length */

            /* decode literal length */
            if (length == RUN_MASK) {
                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
                if (addl == rvl_error) {
                    DEBUGLOG(6, "error reading long literal length");
                    goto _output_error;
                }
                length += addl;
                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */

                /* copy literals */
                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
                if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
                LZ4_wildCopy32(op, ip, op+length);
                ip += length; op += length;
            } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) {
                /* We don't need to check oend, since we check it once for each loop below */
                DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
                /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
                LZ4_memcpy(op, ip, 16);
                ip += length; op += length;
            } else {
                goto safe_literal_copy;
            }

            /* get offset */
            offset = LZ4_readLE16(ip); ip+=2;
            DEBUGLOG(6, " offset = %zu", offset);
            match = op - offset;
            assert(match <= op);  /* overflow check */

            /* get matchlength */
            length = token & ML_MASK;

            if (length == ML_MASK) {
                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
                if (addl == rvl_error) {
                    DEBUGLOG(6, "error reading long match length");
                    goto _output_error;
                }
                length += addl;
                length += MINMATCH;
                if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
                    goto safe_match_copy;
                }
            } else {
                length += MINMATCH;
                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
                    goto safe_match_copy;
                }

                /* Fastpath check: skip LZ4_wildCopy32 when true */
                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
                    if (offset >= 8) {
                        assert(match >= lowPrefix);
                        assert(match <= op);
                        assert(op + 18 <= oend);

                        LZ4_memcpy(op, match, 8);
                        LZ4_memcpy(op+8, match+8, 8);
                        LZ4_memcpy(op+16, match+16, 2);
                        op += length;
                        continue;
            }   }   }

            if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
                DEBUGLOG(6, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
                goto _output_error;
            }
            /* match starting within external dictionary */
            if ((dict==usingExtDict) && (match < lowPrefix)) {
                assert(dictEnd != NULL);
                if (unlikely(op+length > oend-LASTLITERALS)) {
                    if (partialDecoding) {
                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
                        length = MIN(length, (size_t)(oend-op));
                    } else {
                        DEBUGLOG(6, "end-of-block condition violated")
                        goto _output_error;
                }   }

                if (length <= (size_t)(lowPrefix-match)) {
                    /* match fits entirely within external dictionary : just copy */
                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
                    op += length;
                } else {
                    /* match stretches into both external dictionary and current block */
                    size_t const copySize = (size_t)(lowPrefix - match);
                    size_t const restSize = length - copySize;
                    LZ4_memcpy(op, dictEnd - copySize, copySize);
                    op += copySize;
                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                        BYTE* const endOfMatch = op + restSize;
                        const BYTE* copyFrom = lowPrefix;
                        while (op < endOfMatch) { *op++ = *copyFrom++; }
                    } else {
                        LZ4_memcpy(op, lowPrefix, restSize);
                        op += restSize;
                }   }
                continue;
            }

            /* copy match within block */
            cpy = op + length;

            assert((op <= oend) && (oend-op >= 32));
            if (unlikely(offset<16)) {
                LZ4_memcpy_using_offset(op, match, cpy, offset);
            } else {
                LZ4_wildCopy32(op, match, cpy);
            }

            op = cpy;   /* wildcopy correction */
        }
    safe_decode:
#endif

        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
        DEBUGLOG(6, "using safe decode loop");
        while (1) {
            assert(ip < iend);
            token = *ip++;
            length = token >> ML_BITS;  /* literal length */

            /* A two-stage shortcut for the most common case:
             * 1) If the literal length is 0..14, and there is enough space,
             * enter the shortcut and copy 16 bytes on behalf of the literals
             * (in the fast mode, only 8 bytes can be safely copied this way).
             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
             * manner; but we ensure that there's enough space in the output for
             * those 18 bytes earlier, upon entering the shortcut (in other words,
             * there is a combined check for both stages).
             */
            if ( (length != RUN_MASK)
                /* strictly "less than" on input, to re-enter the loop with at least one byte */
              && likely((ip < shortiend) & (op <= shortoend)) ) {
                /* Copy the literals */
                LZ4_memcpy(op, ip, 16);
                op += length; ip += length;

                /* The second stage: prepare for match copying, decode full info.
                 * If it doesn't work out, the info won't be wasted. */
                length = token & ML_MASK; /* match length */
                offset = LZ4_readLE16(ip); ip += 2;
                match = op - offset;
                assert(match <= op); /* check overflow */

                /* Do not deal with overlapping matches. */
                if ( (length != ML_MASK)
                  && (offset >= 8)
                  && (dict==withPrefix64k || match >= lowPrefix) ) {
                    /* Copy the match. */
                    LZ4_memcpy(op + 0, match + 0, 8);
                    LZ4_memcpy(op + 8, match + 8, 8);
                    LZ4_memcpy(op +16, match +16, 2);
                    op += length + MINMATCH;
                    /* Both stages worked, load the next token. */
                    continue;
                }

                /* The second stage didn't work out, but the info is ready.
                 * Propel it right to the point of match copying. */
                goto _copy_match;
            }

            /* decode literal length */
            if (length == RUN_MASK) {
                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
                if (addl == rvl_error) { goto _output_error; }
                length += addl;
                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
            }

#if LZ4_FAST_DEC_LOOP
        safe_literal_copy:
#endif
            /* copy literals */
            cpy = op+length;

            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
            if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
                /* We've either hit the input parsing restriction or the output parsing restriction.
                 * In the normal scenario, decoding a full block, it must be the last sequence,
                 * otherwise it's an error (invalid input or dimensions).
                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
                 */
                if (partialDecoding) {
                    /* Since we are partial decoding we may be in this block because of the output parsing
                     * restriction, which is not valid since the output buffer is allowed to be undersized.
                     */
                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
                    /* Finishing in the middle of a literals segment,
                     * due to lack of input.
                     */
                    if (ip+length > iend) {
                        length = (size_t)(iend-ip);
                        cpy = op + length;
                    }
                    /* Finishing in the middle of a literals segment,
                     * due to lack of output space.
                     */
                    if (cpy > oend) {
                        cpy = oend;
                        assert(op<=oend);
                        length = (size_t)(oend-op);
                    }
                } else {
                     /* We must be on the last sequence (or invalid) because of the parsing limitations
                      * so check that we exactly consume the input and don't overrun the output buffer.
                      */
                    if ((ip+length != iend) || (cpy > oend)) {
                        DEBUGLOG(6, "should have been last run of literals")
                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
                        goto _output_error;
                    }
                }
                LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
                ip += length;
                op += length;
                /* Necessarily EOF when !partialDecoding.
                 * When partialDecoding, it is EOF if we've either
                 * filled the output buffer or
                 * can't proceed with reading an offset for following match.
                 */
                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
                    break;
                }
            } else {
                LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
                ip += length; op = cpy;
            }

            /* get offset */
            offset = LZ4_readLE16(ip); ip+=2;
            match = op - offset;

            /* get matchlength */
            length = token & ML_MASK;

    _copy_match:
            if (length == ML_MASK) {
                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
                if (addl == rvl_error) { goto _output_error; }
                length += addl;
                if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
            }
            length += MINMATCH;

#if LZ4_FAST_DEC_LOOP
        safe_match_copy:
#endif
            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
            /* match starting within external dictionary */
            if ((dict==usingExtDict) && (match < lowPrefix)) {
                assert(dictEnd != NULL);
                if (unlikely(op+length > oend-LASTLITERALS)) {
                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
                    else goto _output_error;   /* doesn't respect parsing restriction */
                }

                if (length <= (size_t)(lowPrefix-match)) {
                    /* match fits entirely within external dictionary : just copy */
                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
                    op += length;
                } else {
                    /* match stretches into both external dictionary and current block */
                    size_t const copySize = (size_t)(lowPrefix - match);
                    size_t const restSize = length - copySize;
                    LZ4_memcpy(op, dictEnd - copySize, copySize);
                    op += copySize;
                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                        BYTE* const endOfMatch = op + restSize;
                        const BYTE* copyFrom = lowPrefix;
                        while (op < endOfMatch) *op++ = *copyFrom++;
                    } else {
                        LZ4_memcpy(op, lowPrefix, restSize);
                        op += restSize;
                }   }
                continue;
            }
            assert(match >= lowPrefix);

            /* copy match within block */
            cpy = op + length;

            /* partialDecoding : may end anywhere within the block */
            assert(op<=oend);
            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
                size_t const mlen = MIN(length, (size_t)(oend-op));
                const BYTE* const matchEnd = match + mlen;
                BYTE* const copyEnd = op + mlen;
                if (matchEnd > op) {   /* overlap copy */
                    while (op < copyEnd) { *op++ = *match++; }
                } else {
                    LZ4_memcpy(op, match, mlen);
                }
                op = copyEnd;
                if (op == oend) { break; }
                continue;
            }

            if (unlikely(offset<8)) {
                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
                op[0] = match[0];
                op[1] = match[1];
                op[2] = match[2];
                op[3] = match[3];
                match += inc32table[offset];
                LZ4_memcpy(op+4, match, 4);
                match -= dec64table[offset];
            } else {
                LZ4_memcpy(op, match, 8);
                match += 8;
            }
            op += 8;

            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
                if (op < oCopyLimit) {
                    LZ4_wildCopy8(op, match, oCopyLimit);
                    match += oCopyLimit - op;
                    op = oCopyLimit;
                }
                while (op < cpy) { *op++ = *match++; }
            } else {
                LZ4_memcpy(op, match, 8);
                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
            }
            op = cpy;   /* wildcopy correction */
        }

        /* end of decoding */
        DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
        return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */

        /* Overflow error detected */
    _output_error:
        return (int) (-(((const char*)ip)-src))-1;
    }
}

LZ4_FORCE_O2
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
{
    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
                                  decode_full_block, noDict,
                                  (BYTE*)dest, NULL, 0);
}

LZ4_FORCE_O2 /* Exported, an obsolete API function. */
int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
{
    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
                                  decode_full_block, withPrefix64k,
                                  (BYTE*)dest - 64 KB, NULL, 0);
}



LZ4_FORCE_O2
static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
                                               size_t prefixSize)
{
    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
                                  decode_full_block, noDict,
                                  (BYTE*)dest-prefixSize, NULL, 0);
}


LZ4_FORCE_O2
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
                                     int compressedSize, int maxOutputSize,
                                     const void* dictStart, size_t dictSize)
{
    DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
                                  decode_full_block, usingExtDict,
                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
}



/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
 * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
 * These routines are used only once, in LZ4_decompress_*_continue().
 */
LZ4_FORCE_INLINE
int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
                                   size_t prefixSize, const void* dictStart, size_t dictSize)
{
    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
                                  decode_full_block, usingExtDict,
                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
}

/*===== streaming decompression functions =====*/

LZ4_streamDecode_t* LZ4_createStreamDecode(void)
{
    LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
	g_allocatedram+=sizeof(LZ4_streamDecode_t);
    return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
}

int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
{
    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
    FREEMEM(LZ4_stream);
    return 0;
}


/*
*_continue() :
    These decoding functions allow decompression of multiple blocks in "streaming" mode.
    Previously decoded blocks must still be available at the memory position where they were decoded.
    If it's not possible, save the relevant part of decoded data into a safe buffer,
    and indicate where it stands using LZ4_setStreamDecode()
*/
LZ4_FORCE_O2
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
{
    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
    int result;

    if (lz4sd->prefixSize == 0) {
        /* The first call, no dictionary yet. */
        assert(lz4sd->extDictSize == 0);
        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
        if (result <= 0) return result;
        lz4sd->prefixSize = (size_t)result;
        lz4sd->prefixEnd = (BYTE*)dest + result;
    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
        /* They're rolling the current segment. */
        if (lz4sd->prefixSize >= 64 KB - 1)
            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
        else if (lz4sd->extDictSize == 0)
            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
                                                         lz4sd->prefixSize);
        else
            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
        if (result <= 0) return result;
        lz4sd->prefixSize += (size_t)result;
        lz4sd->prefixEnd  += result;
    } else {
        /* The buffer wraps around, or they're switching to another buffer. */
        lz4sd->extDictSize = lz4sd->prefixSize;
        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
                                                  lz4sd->externalDict, lz4sd->extDictSize);
        if (result <= 0) return result;
        lz4sd->prefixSize = (size_t)result;
        lz4sd->prefixEnd  = (BYTE*)dest + result;
    }

    return result;
}

/// LICENSE_END.22
#endif




///https://patorjk.com/software/taag/#p=display&f=Graffiti&t=Type%20Something%20
///banner font
void bigguru()
{
	myprintf("\n\n");
	myprintf(" #####  #     # ######  #     #\n");
	myprintf("#     # #     # #     # #     #\n");
	myprintf("#       #     # #     # #     #\n");
	myprintf("#  #### #     # ######  #     #\n");
	myprintf("#     # #     # #   #   #     #\n");
	myprintf("#     # #     # #    #  #     #\n");
	myprintf(" #####   #####  #     #  ##### \n");
	myprintf("\n\n");
}

void bigok()
{
	myprintf("\n\n");
	myprintf("####### #    #\n");
	myprintf("#     # #   #\n");
	myprintf("#     # #  #\n");
	myprintf("#     # ###!\n");
	myprintf("#     # #  #\n");
	myprintf("#     # #   #\n");
	myprintf("####### #    #\n");
	myprintf("\n\n");
}
void bigerror()
{
	myprintf("\n\n");
	myprintf("####### ######  ######  ####### ######  ###\n");
	myprintf("#       #     # #     # #     # #     # ###\n");
	myprintf("#       #     # #     # #     # #     # ###\n");
	myprintf("#####   ######  ######  #     # ######   #\n");
	myprintf("#       #   #   #   #   #     # #   #\n");
	myprintf("#       #    #  #    #  #     # #    #  ###\n");
	myprintf("####### #     # #     # ####### #     # ###\n");
	myprintf("\n\n");
}
void bigwarning()
{
	myprintf("\n\n");
	myprintf("#     #    #    ######  #     # ### #     #  #####\n");
	myprintf("#  #  #   # #   #     # ##    #  #  ##    # #     #\n");
	myprintf("#  #  #  #   #  #     # # #   #  #  # #   # #\n");
	myprintf("#  #  # #     # ######  #  #  #  #  #  #  # #  ####\n");
	myprintf("#  #  # ####### #   #   #   # #  #  #   # # #     #\n");
	myprintf("#  #  # #     # #    #  #    ##  #  #    ## #     #\n");
	myprintf(" ## ##  #     # #     # #     # ### #     #  #####\n");
	myprintf("\n\n");
}

#ifdef HWSHA2
void getcpuid(uint32_t eax, uint32_t ecx, uint32_t& o_a,uint32_t& o_b,uint32_t& o_c,uint32_t& o_d)
{
    uint32_t ebx=0;
	uint32_t edx=0;
	// on some kind of ancient HW ebx need to be saved. Not sure if and why, readed here and there
	// It is unpredictable what exactly the compiler will do
	// https://sites.uclouvain.be/SystInfo/usr/include/cpuid.h.html
#if defined(__i386__) && defined(__PIC__)
   __asm__ __volatile__ (	"movl %%ebx, %%edi;"
							"cpuid;"
							"xchgl %%ebx, %%edi;"
							: "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
#else
	__asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
#endif
	o_a=eax; // cpuid silently change the input parameter, so we have to store back
    o_b=ebx;
    o_c=ecx;
    o_d=edx;
}
#else
void getcpuid(uint32_t eax, uint32_t ecx, uint32_t& o_a,uint32_t& o_b,uint32_t& o_c,uint32_t& o_d)
{
    o_a=0;
	o_b=0;
	o_c=0;
	o_d=0;
}
#endif


#ifdef HWSHA2

/// LICENSE_START.20
/*
	This is a reworked
	CPU accelerated SHA code taken from SHA-Intrinsics - Public Domain
	(https://github.com/noloader/SHA-Intrinsics)
*/

#define ALIGNED(m) __attribute__ ((__aligned__(m)))
#define bswap_uint64 __builtin_bswap64
#define bswap_uint32 __builtin_bswap32
#define bswap_uint16 __builtin_bswap16
#define read_swap32(p) bswap_uint32(*(const uint32_t*)(const uint8_t*)(p))
#define read_swap64(p) bswap_uint64(*(const uint64_t*)(const uint8_t*)(p))
#define PREFETCH64(m) do { __builtin_prefetch(m, 0, 0); __builtin_prefetch(m+32, 0, 0); } while(0)
#define write_swap16(p,v) (*(uint16_t*)(void*)(p)) = bswap_uint16(v)
#define write_swap32(p,v) (*(uint32_t*)(void*)(p)) = bswap_uint32(v)
#define write_swap64(p,v) (*(uint64_t*)(void*)(p)) = bswap_uint64(v)
#define safe_strlen(str) ((((char*)(str))==NULL)?0:strlen(str))

/* Includes for SHA-1 and SHA-256 intrinsics */
#include <x86intrin.h>
#define MY_ENABLE_GCC_ARCH(arch) __attribute__ ((target (arch)))


/* Blocksize for each algorithm - Must be a power of 2 */
#define SHA1_BLOCKSIZE      64
#define SHA256_BLOCKSIZE    64
#define MAX_BLOCKSIZE       SHA256_BLOCKSIZE


/*
 * Rotate 32 or 64 bit integers by n bytes.
 * Don't bother trying to hand-optimize those, as the
 * compiler usually does a pretty good job at that.
 */
#define ROL32(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROR32(a,b) (((a) >> (b)) | ((a) << (32-(b))))
#define ROL64(a,b) (((a) << (b)) | ((a) >> (64-(b))))
#define ROR64(a,b) (((a) >> (b)) | ((a) << (64-(b))))

/*
 * SHA-256 common macros (use Wikipedia SHA-2 names for clarity)
 */
#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define Ma(x,y,z) (((x) & (y)) | ((z) & ((x) | (y))))

/* SHA-256 constants */
static const uint32_t K256[64] = {
	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};


/*
 * For convenience, we use a common context for all the checksum algorithms,
 * which means some elements may be unused...
 Please note the ALIGNED that makes a lot of trouble...
 */
typedef struct ALIGNED(64) {
	uint8_t buf[MAX_BLOCKSIZE];
	uint64_t state[8];
	uint64_t bytecount;
} SUM_CONTEXT;


static void sha1_init(SUM_CONTEXT *ctx)
{
	memset(ctx, 0, sizeof(*ctx));
	ctx->state[0] = 0x67452301;
	ctx->state[1] = 0xefcdab89;
	ctx->state[2] = 0x98badcfe;
	ctx->state[3] = 0x10325476;
	ctx->state[4] = 0xc3d2e1f0;
}

static void sha256_init(SUM_CONTEXT *ctx)
{
	memset(ctx, 0, sizeof(*ctx));
	ctx->state[0] = 0x6a09e667;
	ctx->state[1] = 0xbb67ae85;
	ctx->state[2] = 0x3c6ef372;
	ctx->state[3] = 0xa54ff53a;
	ctx->state[4] = 0x510e527f;
	ctx->state[5] = 0x9b05688c;
	ctx->state[6] = 0x1f83d9ab;
	ctx->state[7] = 0x5be0cd19;
}

/* Transform the message X which consists of 16 32-bit-words (SHA-1) */
static void sha1_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data)
{
	uint32_t a, b, c, d, e, tm, x[16];

	/* get values from the chaining vars */
	a = (uint32_t)ctx->state[0];
	b = (uint32_t)ctx->state[1];
	c = (uint32_t)ctx->state[2];
	d = (uint32_t)ctx->state[3];
	e = (uint32_t)ctx->state[4];

#ifdef BIG
	memcpy(x, data, sizeof(x));
#else
	{

		unsigned k;
		for (k = 0; k < 16; k += 4) {
			const uint8_t *p2 = data + k * 4;
			x[k] = read_swap32(p2);
			x[k + 1] = read_swap32(p2 + 4);
			x[k + 2] = read_swap32(p2 + 8);
			x[k + 3] = read_swap32(p2 + 12);
		}
	}
#endif

#define K1  0x5a827999L
#define K2  0x6ed9eba1L
#define K3  0x8f1bbcdcL
#define K4  0xca62c1d6L
#define F1(x,y,z)   ( z ^ ( x & ( y ^ z ) ) )
#define F2(x,y,z)   ( x ^ y ^ z )
#define F3(x,y,z)   ( ( x & y ) | ( z & ( x | y ) ) )
#define F4(x,y,z)   ( x ^ y ^ z )

#define M(i) ( tm = x[i&0x0f] ^ x[(i-14)&0x0f] ^ x[(i-8)&0x0f] ^ x[(i-3)&0x0f], (x[i&0x0f] = ROL32(tm,1)) )

#define SHA1STEP(a, b, c, d, e, f, k, m) do { e += ROL32(a, 5) + f(b, c, d) + k + m; \
                                              b = ROL32(b, 30); } while(0)
	SHA1STEP(a, b, c, d, e, F1, K1, x[0]);
	SHA1STEP(e, a, b, c, d, F1, K1, x[1]);
	SHA1STEP(d, e, a, b, c, F1, K1, x[2]);
	SHA1STEP(c, d, e, a, b, F1, K1, x[3]);
	SHA1STEP(b, c, d, e, a, F1, K1, x[4]);
	SHA1STEP(a, b, c, d, e, F1, K1, x[5]);
	SHA1STEP(e, a, b, c, d, F1, K1, x[6]);
	SHA1STEP(d, e, a, b, c, F1, K1, x[7]);
	SHA1STEP(c, d, e, a, b, F1, K1, x[8]);
	SHA1STEP(b, c, d, e, a, F1, K1, x[9]);
	SHA1STEP(a, b, c, d, e, F1, K1, x[10]);
	SHA1STEP(e, a, b, c, d, F1, K1, x[11]);
	SHA1STEP(d, e, a, b, c, F1, K1, x[12]);
	SHA1STEP(c, d, e, a, b, F1, K1, x[13]);
	SHA1STEP(b, c, d, e, a, F1, K1, x[14]);
	SHA1STEP(a, b, c, d, e, F1, K1, x[15]);
	SHA1STEP(e, a, b, c, d, F1, K1, M(16));
	SHA1STEP(d, e, a, b, c, F1, K1, M(17));
	SHA1STEP(c, d, e, a, b, F1, K1, M(18));
	SHA1STEP(b, c, d, e, a, F1, K1, M(19));
	SHA1STEP(a, b, c, d, e, F2, K2, M(20));
	SHA1STEP(e, a, b, c, d, F2, K2, M(21));
	SHA1STEP(d, e, a, b, c, F2, K2, M(22));
	SHA1STEP(c, d, e, a, b, F2, K2, M(23));
	SHA1STEP(b, c, d, e, a, F2, K2, M(24));
	SHA1STEP(a, b, c, d, e, F2, K2, M(25));
	SHA1STEP(e, a, b, c, d, F2, K2, M(26));
	SHA1STEP(d, e, a, b, c, F2, K2, M(27));
	SHA1STEP(c, d, e, a, b, F2, K2, M(28));
	SHA1STEP(b, c, d, e, a, F2, K2, M(29));
	SHA1STEP(a, b, c, d, e, F2, K2, M(30));
	SHA1STEP(e, a, b, c, d, F2, K2, M(31));
	SHA1STEP(d, e, a, b, c, F2, K2, M(32));
	SHA1STEP(c, d, e, a, b, F2, K2, M(33));
	SHA1STEP(b, c, d, e, a, F2, K2, M(34));
	SHA1STEP(a, b, c, d, e, F2, K2, M(35));
	SHA1STEP(e, a, b, c, d, F2, K2, M(36));
	SHA1STEP(d, e, a, b, c, F2, K2, M(37));
	SHA1STEP(c, d, e, a, b, F2, K2, M(38));
	SHA1STEP(b, c, d, e, a, F2, K2, M(39));
	SHA1STEP(a, b, c, d, e, F3, K3, M(40));
	SHA1STEP(e, a, b, c, d, F3, K3, M(41));
	SHA1STEP(d, e, a, b, c, F3, K3, M(42));
	SHA1STEP(c, d, e, a, b, F3, K3, M(43));
	SHA1STEP(b, c, d, e, a, F3, K3, M(44));
	SHA1STEP(a, b, c, d, e, F3, K3, M(45));
	SHA1STEP(e, a, b, c, d, F3, K3, M(46));
	SHA1STEP(d, e, a, b, c, F3, K3, M(47));
	SHA1STEP(c, d, e, a, b, F3, K3, M(48));
	SHA1STEP(b, c, d, e, a, F3, K3, M(49));
	SHA1STEP(a, b, c, d, e, F3, K3, M(50));
	SHA1STEP(e, a, b, c, d, F3, K3, M(51));
	SHA1STEP(d, e, a, b, c, F3, K3, M(52));
	SHA1STEP(c, d, e, a, b, F3, K3, M(53));
	SHA1STEP(b, c, d, e, a, F3, K3, M(54));
	SHA1STEP(a, b, c, d, e, F3, K3, M(55));
	SHA1STEP(e, a, b, c, d, F3, K3, M(56));
	SHA1STEP(d, e, a, b, c, F3, K3, M(57));
	SHA1STEP(c, d, e, a, b, F3, K3, M(58));
	SHA1STEP(b, c, d, e, a, F3, K3, M(59));
	SHA1STEP(a, b, c, d, e, F4, K4, M(60));
	SHA1STEP(e, a, b, c, d, F4, K4, M(61));
	SHA1STEP(d, e, a, b, c, F4, K4, M(62));
	SHA1STEP(c, d, e, a, b, F4, K4, M(63));
	SHA1STEP(b, c, d, e, a, F4, K4, M(64));
	SHA1STEP(a, b, c, d, e, F4, K4, M(65));
	SHA1STEP(e, a, b, c, d, F4, K4, M(66));
	SHA1STEP(d, e, a, b, c, F4, K4, M(67));
	SHA1STEP(c, d, e, a, b, F4, K4, M(68));
	SHA1STEP(b, c, d, e, a, F4, K4, M(69));
	SHA1STEP(a, b, c, d, e, F4, K4, M(70));
	SHA1STEP(e, a, b, c, d, F4, K4, M(71));
	SHA1STEP(d, e, a, b, c, F4, K4, M(72));
	SHA1STEP(c, d, e, a, b, F4, K4, M(73));
	SHA1STEP(b, c, d, e, a, F4, K4, M(74));
	SHA1STEP(a, b, c, d, e, F4, K4, M(75));
	SHA1STEP(e, a, b, c, d, F4, K4, M(76));
	SHA1STEP(d, e, a, b, c, F4, K4, M(77));
	SHA1STEP(c, d, e, a, b, F4, K4, M(78));
	SHA1STEP(b, c, d, e, a, F4, K4, M(79));

#undef F1
#undef F2
#undef F3
#undef F4

	/* Update chaining vars */
	ctx->state[0] += a;
	ctx->state[1] += b;
	ctx->state[2] += c;
	ctx->state[3] += d;
	ctx->state[4] += e;
}

/*
 * Transform the message X which consists of 16 32-bit-words (SHA-1)
 * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
 */
MY_ENABLE_GCC_ARCH("ssse3,sse4.1,sha")
static void sha1_transform_x86(uint64_t state64[5], const uint8_t *data, size_t length)
{
	__m128i ABCD, E0, E1;
	__m128i MSG0, MSG1, MSG2, MSG3;

	const __m128i MYMASK = _mm_set_epi64x(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);

	uint32_t state[5] = {
		(uint32_t)state64[0],
		(uint32_t)state64[1],
		(uint32_t)state64[2],
		(uint32_t)state64[3],
		(uint32_t)state64[4]
	};

	/* Load initial values */
	ABCD = _mm_loadu_si128((const __m128i*) state);
	E0 = _mm_set_epi32(state[4], 0, 0, 0);
	ABCD = _mm_shuffle_epi32(ABCD, 0x1B);

	while (length >= SHA1_BLOCKSIZE)
	{
		/* Save current state  */
		const __m128i ABCD_SAVE = ABCD;
		const __m128i E0_SAVE = E0;

		/* Rounds 0-3 */
		MSG0 = _mm_loadu_si128((const __m128i*)(data + 0));
		MSG0 = _mm_shuffle_epi8(MSG0, MYMASK);
		E0 = _mm_add_epi32(E0, MSG0);
		E1 = ABCD;
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);

		/* Rounds 4-7 */
		MSG1 = _mm_loadu_si128((const __m128i*)(data + 16));
		MSG1 = _mm_shuffle_epi8(MSG1, MYMASK);
		E1 = _mm_sha1nexte_epu32(E1, MSG1);
		E0 = ABCD;
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);

		/* Rounds 8-11 */
		MSG2 = _mm_loadu_si128((const __m128i*)(data + 32));
		MSG2 = _mm_shuffle_epi8(MSG2, MYMASK);
		E0 = _mm_sha1nexte_epu32(E0, MSG2);
		E1 = ABCD;
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
		MSG0 = _mm_xor_si128(MSG0, MSG2);

		/* Rounds 12-15 */
		MSG3 = _mm_loadu_si128((const __m128i*)(data + 48));
		MSG3 = _mm_shuffle_epi8(MSG3, MYMASK);
		E1 = _mm_sha1nexte_epu32(E1, MSG3);
		E0 = ABCD;
		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
		MSG1 = _mm_xor_si128(MSG1, MSG3);

		/* Rounds 16-19 */
		E0 = _mm_sha1nexte_epu32(E0, MSG0);
		E1 = ABCD;
		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
		MSG2 = _mm_xor_si128(MSG2, MSG0);

		/* Rounds 20-23 */
		E1 = _mm_sha1nexte_epu32(E1, MSG1);
		E0 = ABCD;
		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
		MSG3 = _mm_xor_si128(MSG3, MSG1);

		/* Rounds 24-27 */
		E0 = _mm_sha1nexte_epu32(E0, MSG2);
		E1 = ABCD;
		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
		MSG0 = _mm_xor_si128(MSG0, MSG2);

		/* Rounds 28-31 */
		E1 = _mm_sha1nexte_epu32(E1, MSG3);
		E0 = ABCD;
		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
		MSG1 = _mm_xor_si128(MSG1, MSG3);

		/* Rounds 32-35 */
		E0 = _mm_sha1nexte_epu32(E0, MSG0);
		E1 = ABCD;
		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
		MSG2 = _mm_xor_si128(MSG2, MSG0);

		/* Rounds 36-39 */
		E1 = _mm_sha1nexte_epu32(E1, MSG1);
		E0 = ABCD;
		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
		MSG3 = _mm_xor_si128(MSG3, MSG1);

		/* Rounds 40-43 */
		E0 = _mm_sha1nexte_epu32(E0, MSG2);
		E1 = ABCD;
		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
		MSG0 = _mm_xor_si128(MSG0, MSG2);

		/* Rounds 44-47 */
		E1 = _mm_sha1nexte_epu32(E1, MSG3);
		E0 = ABCD;
		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
		MSG1 = _mm_xor_si128(MSG1, MSG3);

		/* Rounds 48-51 */
		E0 = _mm_sha1nexte_epu32(E0, MSG0);
		E1 = ABCD;
		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
		MSG2 = _mm_xor_si128(MSG2, MSG0);

		/* Rounds 52-55 */
		E1 = _mm_sha1nexte_epu32(E1, MSG1);
		E0 = ABCD;
		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
		MSG3 = _mm_xor_si128(MSG3, MSG1);

		/* Rounds 56-59 */
		E0 = _mm_sha1nexte_epu32(E0, MSG2);
		E1 = ABCD;
		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
		MSG0 = _mm_xor_si128(MSG0, MSG2);

		/* Rounds 60-63 */
		E1 = _mm_sha1nexte_epu32(E1, MSG3);
		E0 = ABCD;
		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
		MSG1 = _mm_xor_si128(MSG1, MSG3);

		/* Rounds 64-67 */
		E0 = _mm_sha1nexte_epu32(E0, MSG0);
		E1 = ABCD;
		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
		MSG2 = _mm_xor_si128(MSG2, MSG0);

		/* Rounds 68-71 */
		E1 = _mm_sha1nexte_epu32(E1, MSG1);
		E0 = ABCD;
		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
		MSG3 = _mm_xor_si128(MSG3, MSG1);

		/* Rounds 72-75 */
		E0 = _mm_sha1nexte_epu32(E0, MSG2);
		E1 = ABCD;
		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);

		/* Rounds 76-79 */
		E1 = _mm_sha1nexte_epu32(E1, MSG3);
		E0 = ABCD;
		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);

		/* Combine state */
		E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
		ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);

		data += 64;
		length -= 64;
	}

	/* Save state */
	ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
	_mm_storeu_si128((__m128i*) state, ABCD);
	state[4] = _mm_extract_epi32(E0, 3);

	/* Repack into uint64_t. */
	state64[0] = state[0];
	state64[1] = state[1];
	state64[2] = state[2];
	state64[3] = state[3];
	state64[4] = state[4];
}

/* Transform the message X which consists of 16 32-bit-words (SHA-1) */
static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data)
{
	if (flaghw)
		sha1_transform_x86(ctx->state, data, SHA1_BLOCKSIZE);
	else
		sha1_transform_cc(ctx, data);
}

/* Transform the message X which consists of 16 32-bit-words (SHA-256) */
static __inline void sha256_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data)
{
	uint32_t a, b, c, d, e, f, g, h, j, x[16];

	a = (uint32_t)ctx->state[0];
	b = (uint32_t)ctx->state[1];
	c = (uint32_t)ctx->state[2];
	d = (uint32_t)ctx->state[3];
	e = (uint32_t)ctx->state[4];
	f = (uint32_t)ctx->state[5];
	g = (uint32_t)ctx->state[6];
	h = (uint32_t)ctx->state[7];

// Nesting the ROR allows for single register compiler optimizations
#define S0(x) (ROR32(ROR32(ROR32(x,9)^(x),11)^(x),2))	// Σ0 (Sigma 0)
#define S1(x) (ROR32(ROR32(ROR32(x,14)^(x),5)^(x),6))	// Σ1 (Sigma 1)
#define s0(x) (ROR32(ROR32(x,11)^(x),7)^((x)>>3))		// σ0 (sigma 0)
#define s1(x) (ROR32(ROR32(x,2)^(x),17)^((x)>>10))		// σ1 (sigma 1)
#define BLK0(i) (x[i])
#define BLK2(i) (x[i] += s1(x[((i)-2)&15]) + x[((i)-7)&15] + s0(x[((i)-15)&15]))
#define R(a, b, c, d, e, f, g, h, i) \
	h += S1(e) + Ch(e,f,g) + K256[(i)+(j)] + (j ? BLK2(i) : BLK0(i)); \
	d += h; \
	h += S0(a) + Ma(a, b, c)
#define RX_8(i) \
	R(a, b, c, d, e, f, g, h, i);   \
	R(h, a, b, c, d, e, f, g, i+1); \
	R(g, h, a, b, c, d, e, f, i+2); \
	R(f, g, h, a, b, c, d, e, i+3); \
	R(e, f, g, h, a, b, c, d, i+4); \
	R(d, e, f, g, h, a, b, c, i+5); \
	R(c, d, e, f, g, h, a, b, i+6); \
	R(b, c, d, e, f, g, h, a, i+7)

#ifdef BIG
	memcpy(x, data, sizeof(x));
#else
	{
		uint32_t k;
		for (k = 0; k < 16; k += 4) {
			const uint8_t* p2 = data + k * 4;
			x[k] = read_swap32(p2);
			x[k + 1] = read_swap32(p2 + 4);
			x[k + 2] = read_swap32(p2 + 8);
			x[k + 3] = read_swap32(p2 + 12);
		}
	}
#endif

	for (j = 0; j < 64; j += 16) {
		RX_8(0);
		RX_8(8);
	}

#undef S0
#undef S1
#undef s0
#undef s1
#undef BLK0
#undef BLK2
#undef R
#undef RX_8

	ctx->state[0] += a;
	ctx->state[1] += b;
	ctx->state[2] += c;
	ctx->state[3] += d;
	ctx->state[4] += e;
	ctx->state[5] += f;
	ctx->state[6] += g;
	ctx->state[7] += h;
}

/*
 * Transform the message X which consists of 16 32-bit-words (SHA-256)
 * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
 */
MY_ENABLE_GCC_ARCH("ssse3,sse4.1,sha")
static __inline void sha256_transform_x86(uint64_t state64[8], const uint8_t *data, size_t length)
{
	__m128i STATE0, STATE1;
	__m128i MSG, TMP;
	__m128i MSG0, MSG1, MSG2, MSG3;
	const __m128i MYMASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);

	uint32_t state[8] = {
		(uint32_t)state64[0],
		(uint32_t)state64[1],
		(uint32_t)state64[2],
		(uint32_t)state64[3],
		(uint32_t)state64[4],
		(uint32_t)state64[5],
		(uint32_t)state64[6],
		(uint32_t)state64[7]
	};

	/* Load initial values */
	TMP = _mm_loadu_si128((const __m128i*) (state+0));
	STATE1 = _mm_loadu_si128((const __m128i*) (state+4));

	TMP = _mm_shuffle_epi32(TMP, 0xB1);          /* CDAB */
	STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);    /* EFGH */
	STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);    /* ABEF */
	STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */

	while (length >= SHA256_BLOCKSIZE)
	{
		/* Save current state */
		const __m128i ABEF_SAVE = STATE0;
		const __m128i CDGH_SAVE = STATE1;

		/* Rounds 0-3 */
		MSG = _mm_loadu_si128((const __m128i*) (data+0));
		MSG0 = _mm_shuffle_epi8(MSG, MYMASK);
		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);

		/* Rounds 4-7 */
		MSG1 = _mm_loadu_si128((const __m128i*) (data+16));
		MSG1 = _mm_shuffle_epi8(MSG1, MYMASK);
		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);

		/* Rounds 8-11 */
		MSG2 = _mm_loadu_si128((const __m128i*) (data+32));
		MSG2 = _mm_shuffle_epi8(MSG2, MYMASK);
		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);

		/* Rounds 12-15 */
		MSG3 = _mm_loadu_si128((const __m128i*) (data+48));
		MSG3 = _mm_shuffle_epi8(MSG3, MYMASK);
		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
		MSG0 = _mm_add_epi32(MSG0, TMP);
		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);

		/* Rounds 16-19 */
		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
		MSG1 = _mm_add_epi32(MSG1, TMP);
		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);

		/* Rounds 20-23 */
		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
		MSG2 = _mm_add_epi32(MSG2, TMP);
		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);

		/* Rounds 24-27 */
		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
		MSG3 = _mm_add_epi32(MSG3, TMP);
		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);

		/* Rounds 28-31 */
		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x1429296706CA6351ULL,  0xD5A79147C6E00BF3ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
		MSG0 = _mm_add_epi32(MSG0, TMP);
		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);

		/* Rounds 32-35 */
		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
		MSG1 = _mm_add_epi32(MSG1, TMP);
		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);

		/* Rounds 36-39 */
		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
		MSG2 = _mm_add_epi32(MSG2, TMP);
		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);

		/* Rounds 40-43 */
		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
		MSG3 = _mm_add_epi32(MSG3, TMP);
		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);

		/* Rounds 44-47 */
		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
		MSG0 = _mm_add_epi32(MSG0, TMP);
		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);

		/* Rounds 48-51 */
		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
		MSG1 = _mm_add_epi32(MSG1, TMP);
		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);

		/* Rounds 52-55 */
		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
		MSG2 = _mm_add_epi32(MSG2, TMP);
		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);

		/* Rounds 56-59 */
		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
		MSG3 = _mm_add_epi32(MSG3, TMP);
		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);

		/* Rounds 60-63 */
		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL));
		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
		MSG = _mm_shuffle_epi32(MSG, 0x0E);
		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);

		/* Combine state  */
		STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
		STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);

		data += 64;
		length -= 64;
	}

	TMP = _mm_shuffle_epi32(STATE0, 0x1B);       /* FEBA */
	STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);    /* DCHG */
	STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */
	STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);    /* ABEF */

	/* Save state */
	_mm_storeu_si128((__m128i*) (state+0), STATE0);
	_mm_storeu_si128((__m128i*) (state+4), STATE1);

	/* Repack into uint64_t. */
	state64[0] = state[0];
	state64[1] = state[1];
	state64[2] = state[2];
	state64[3] = state[3];
	state64[4] = state[4];
	state64[5] = state[5];
	state64[6] = state[6];
	state64[7] = state[7];
}

static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data)
{
	if (flaghw)
		sha256_transform_x86(ctx->state, data, SHA256_BLOCKSIZE);
	else
		sha256_transform_cc(ctx, data);
}



/* Update the message digest with the contents of the buffer (SHA-1) */
static void sha1_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len)
{
	size_t num = ctx->bytecount & (SHA1_BLOCKSIZE - 1);

	/* Update bytecount */
	ctx->bytecount += len;

	/* Handle any leading odd-sized chunks */
	if (num) {
		uint8_t *p = ctx->buf + num;

		num = SHA1_BLOCKSIZE - num;
		if (len < num) {
			memcpy(p, buf, len);
			return;
		}
		memcpy(p, buf, num);
		sha1_transform(ctx, ctx->buf);
		buf += num;
		len -= num;
	}

	if (flaghw)
	{
		/* Process all full blocks at once */
		if (len >= SHA1_BLOCKSIZE) {
			/* Calculate full blocks, in bytes */
			num = (len / SHA1_BLOCKSIZE) * SHA1_BLOCKSIZE;
			/* SHA-1 acceleration using intrinsics */
			sha1_transform_x86(ctx->state, buf, num);
			buf += num;
			len -= num;
		}
	}
	else
	{
		/* Process data in blocksize chunks */
		while (len >= SHA1_BLOCKSIZE) {
			PREFETCH64(buf + SHA1_BLOCKSIZE);
			sha1_transform(ctx, buf);
			buf += SHA1_BLOCKSIZE;
			len -= SHA1_BLOCKSIZE;
		}
	}

	/* Handle any remaining bytes of data. */
	memcpy(ctx->buf, buf, len);
}

/* Update the message digest with the contents of the buffer (SHA-256) */
static void sha256_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len)
{
	size_t num = ctx->bytecount & (SHA256_BLOCKSIZE - 1);

	/* Update bytecount */
	ctx->bytecount += len;

	/* Handle any leading odd-sized chunks */
	if (num) {
		uint8_t *p = ctx->buf + num;

		num = SHA256_BLOCKSIZE - num;
		if (len < num) {
			memcpy(p, buf, len);
			return;
		}
		memcpy(p, buf, num);
		sha256_transform(ctx, ctx->buf);
		buf += num;
		len -= num;
	}

	if (flaghw)
	{
		/* Process all full blocks at once */
		if (len >= SHA256_BLOCKSIZE) {
			/* Calculate full blocks, in bytes */
			num = (len / SHA256_BLOCKSIZE) * SHA256_BLOCKSIZE;
			/* SHA-256 acceleration using intrinsics */
			sha256_transform_x86(ctx->state, buf, num);
			buf += num;
			len -= num;
		}
	}
	else
	{
		/* Process data in blocksize chunks */
		while (len >= SHA256_BLOCKSIZE) {
			PREFETCH64(buf + SHA256_BLOCKSIZE);
			sha256_transform(ctx, buf);
			buf += SHA256_BLOCKSIZE;
			len -= SHA256_BLOCKSIZE;
		}
	}

	/* Handle any remaining bytes of data. */
	memcpy(ctx->buf, buf, len);
}

/* Finalize the computation and write the digest in ctx->state[] (SHA-1) */
static void sha1_final(SUM_CONTEXT *ctx)
{
	size_t pos = ((size_t)ctx->bytecount) & (SHA1_BLOCKSIZE - 1);
	uint64_t bitcount = ctx->bytecount << 3;
	uint8_t *p;

	ctx->buf[pos++] = 0x80;

	/* Pad whatever data is left in the buffer */
	while (pos != (SHA1_BLOCKSIZE - sizeof(uint64_t))) {
		pos &= (SHA1_BLOCKSIZE - 1);
		if (pos == 0)
			sha1_transform(ctx, ctx->buf);
		ctx->buf[pos++] = 0;
	}

	/* Append to the padding the total message's length in bits and transform */
	ctx->buf[SHA1_BLOCKSIZE - 1] = (uint8_t) bitcount;
	ctx->buf[SHA1_BLOCKSIZE - 2] = (uint8_t) (bitcount >> 8);
	ctx->buf[SHA1_BLOCKSIZE - 3] = (uint8_t) (bitcount >> 16);
	ctx->buf[SHA1_BLOCKSIZE - 4] = (uint8_t) (bitcount >> 24);
	ctx->buf[SHA1_BLOCKSIZE - 5] = (uint8_t) (bitcount >> 32);
	ctx->buf[SHA1_BLOCKSIZE - 6] = (uint8_t) (bitcount >> 40);
	ctx->buf[SHA1_BLOCKSIZE - 7] = (uint8_t) (bitcount >> 48);
	ctx->buf[SHA1_BLOCKSIZE - 8] = (uint8_t) (bitcount >> 56);

	sha1_transform(ctx, ctx->buf);

	p = ctx->buf;
#ifdef BIG
#define X(a) do { *(uint32_t*)p = (uint32_t)ctx->state[a]; p += 4; } while(0)
#else
#define X(a) do { write_swap32(p, (uint32_t)ctx->state[a]); p += 4; } while(0);
#endif
	X(0);
	X(1);
	X(2);
	X(3);
	X(4);
#undef X
}

/* Finalize the computation and write the digest in ctx->state[] (SHA-256) */
static void sha256_final(SUM_CONTEXT *ctx)
{
	size_t pos = ((size_t)ctx->bytecount) & (SHA256_BLOCKSIZE - 1);
	uint64_t bitcount = ctx->bytecount << 3;
	uint8_t *p;

	ctx->buf[pos++] = 0x80;

	/* Pad whatever data is left in the buffer */
	while (pos != (SHA256_BLOCKSIZE - sizeof(uint64_t))) {
		pos &= (SHA256_BLOCKSIZE - 1);
		if (pos == 0)
			sha256_transform(ctx, ctx->buf);
		ctx->buf[pos++] = 0;
	}

	/* Append to the padding the total message's length in bits and transform */
	ctx->buf[SHA256_BLOCKSIZE - 1] = (uint8_t) bitcount;
	ctx->buf[SHA256_BLOCKSIZE - 2] = (uint8_t) (bitcount >> 8);
	ctx->buf[SHA256_BLOCKSIZE - 3] = (uint8_t) (bitcount >> 16);
	ctx->buf[SHA256_BLOCKSIZE - 4] = (uint8_t) (bitcount >> 24);
	ctx->buf[SHA256_BLOCKSIZE - 5] = (uint8_t) (bitcount >> 32);
	ctx->buf[SHA256_BLOCKSIZE - 6] = (uint8_t) (bitcount >> 40);
	ctx->buf[SHA256_BLOCKSIZE - 7] = (uint8_t) (bitcount >> 48);
	ctx->buf[SHA256_BLOCKSIZE - 8] = (uint8_t) (bitcount >> 56);

	sha256_transform(ctx, ctx->buf);

	p = ctx->buf;
#ifdef BIG
#define X(a) do { *(uint32_t*)p = (uint32_t)ctx->state[a]; p += 4; } while(0)
#else
#define X(a) do { write_swap32(p, (uint32_t)ctx->state[a]); p += 4; } while(0);
#endif
	X(0);
	X(1);
	X(2);
	X(3);
	X(4);
	X(5);
	X(6);
	X(7);
#undef X
}
#endif

/// LICENSE_END.20




/// LICENSE_START.5
#ifdef HWSHA1
/*
	This is SHA1 via linked asm (Windows only)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#define MY_NO_INLINE __attribute__((noinline))
#define MY_FAST_CALL
typedef unsigned char 			Byte;
typedef short 					Int16;
typedef int 					Int32;
typedef long long int 			Int64;
typedef unsigned short 			UInt16;
typedef unsigned int 			UInt32;
typedef unsigned long long int 	UInt64;
typedef int 					BoolInt;
#define SHA1_NUM_BLOCK_WORDS  16
#define SHA1_NUM_DIGEST_WORDS  5
#define SHA1_BLOCK_SIZE   (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE  (SHA1_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);

typedef struct
{
  SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
  UInt64 count;
  UInt64 __pad_2[2];
  UInt32 state[SHA1_NUM_DIGEST_WORDS];
  UInt32 __pad_3[3];
  Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;

void Sha1Prepare(bool i_flaghardware=false);
void Sha1_InitState(CSha1 *p);
void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final			(CSha1 *p, Byte *digest);
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
extern "C" void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;

#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))

#define STEP_PRE  20
#define STEP_MAIN 20
#define kNumW 16
#define w(i) W[(i)&15]

#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))

#define sha1_f0(x,y,z)  ( 0x5a827999 + (z^(x&(y^z))) )
#define sha1_f1(x,y,z)  ( 0x6ed9eba1 + (x^y^z) )
#define sha1_f2(x,y,z)  ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
#define sha1_f3(x,y,z)  ( 0xca62c1d6 + (x^y^z) )

#define T5(a,b,c,d,e, fx, ww) \
    e += fx(b,c,d) + ww + rotlFixed(a, 5); \
    b = rotlFixed(b, 30); \

#define M5(i, fx, wx0, wx1) \
    T5 ( a,b,c,d,e, fx, wx0((i)  ) ); \
    T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
    T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
    T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
    T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \

#define R5(i, fx, wx) \
    M5 ( i, fx, wx, wx) \


#if STEP_PRE > 5

  #define R20_START \
    R5 (  0, sha1_f0, w0); \
    R5 (  5, sha1_f0, w0); \
    R5 ( 10, sha1_f0, w0); \
    M5 ( 15, sha1_f0, w0, w1); \

  #elif STEP_PRE == 5

  #define R20_START \
    { size_t i; for (i = 0; i < 15; i += STEP_PRE) \
      { R5(i, sha1_f0, w0); } } \
    M5 ( 15, sha1_f0, w0, w1); \

#else

  #if STEP_PRE == 1
    #define R_PRE R1
  #elif STEP_PRE == 2
    #define R_PRE R2
  #elif STEP_PRE == 4
    #define R_PRE R4
  #endif

  #define R20_START \
    { size_t i; for (i = 0; i < 16; i += STEP_PRE) \
      { R_PRE(i, sha1_f0, w0); } } \
    R4 ( 16, sha1_f0, w1); \

#endif

#if STEP_MAIN > 5

  #define R20(ii, fx) \
    R5 ( (ii)     , fx, w1); \
    R5 ( (ii) + 5 , fx, w1); \
    R5 ( (ii) + 10, fx, w1); \
    R5 ( (ii) + 15, fx, w1); \

#else

  #if STEP_MAIN == 1
    #define R_MAIN R1
  #elif STEP_MAIN == 2
    #define R_MAIN R2
  #elif STEP_MAIN == 4
    #define R_MAIN R4
  #elif STEP_MAIN == 5
    #define R_MAIN R5
  #endif

  #define R20(ii, fx)  \
    { size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
      { R_MAIN(i, fx, w1); } } \

#endif


#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define GetBe32(p) ( \
    ((UInt32)((const Byte *)(p))[0] << 24) | \
    ((UInt32)((const Byte *)(p))[1] << 16) | \
    ((UInt32)((const Byte *)(p))[2] <<  8) | \
             ((const Byte *)(p))[3] )
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
    _ppp_[0] = (Byte)(_vvv_ >> 24); \
    _ppp_[1] = (Byte)(_vvv_ >> 16); \
    _ppp_[2] = (Byte)(_vvv_ >> 8); \
    _ppp_[3] = (Byte)_vvv_; }


void Sha1_InitState(CSha1 *p)
{
	p->count = 0;
	p->state[0] = 0x67452301;
	p->state[1] = 0xEFCDAB89;
	p->state[2] = 0x98BADCFE;
	p->state[3] = 0x10325476;
	p->state[4] = 0xC3D2E1F0;
}

void Sha1_Init(CSha1 *p)
{
	p->func_UpdateBlocks =     g_FUNC_UPDATE_BLOCKS;
	Sha1_InitState(p);
}
MY_NO_INLINE
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
	UInt32 a, b, c, d, e;
	UInt32 W[kNumW];
	if (numBlocks==0)
		return;
	a = state[0];
	b = state[1];
	c = state[2];
	d = state[3];
	e = state[4];
	do
	{
		#if STEP_PRE < 5 || STEP_MAIN < 5
		UInt32 tmp;
		#endif
		R20_START
		R20(20, sha1_f1);
		R20(40, sha1_f2);
		R20(60, sha1_f3);
		a += state[0];
		b += state[1];
		c += state[2];
		d += state[3];
		e += state[4];
		state[0] = a;
		state[1] = b;
		state[2] = c;
		state[3] = d;
		state[4] = e;
		data += 64;
	}
	while (--numBlocks);
}
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
	if (size==0)
		return;
	unsigned pos = (unsigned)p->count & 0x3F;
	unsigned num;
	p->count += size;
	num=64-pos;
	if (num > size)
	{
		memcpy(p->buffer + pos, data, size);
		return;
	}
	if (pos != 0)
	{
		size -= num;
		memcpy(p->buffer + pos, data, num);
		data += num;
		p->func_UpdateBlocks(p->state, p->buffer, 1);
	}
	size_t numBlocks = size >> 6;
	p->func_UpdateBlocks(p->state, data, numBlocks);
	size &= 0x3F;
	if (size==0)
	  return;
	data += (numBlocks << 6);
	memcpy(p->buffer, data, size);
}
void Sha1_Final(CSha1 *p, Byte *digest)
{
	unsigned pos = (unsigned)p->count & 0x3F;
	p->buffer[pos++] = 0x80;
	if (pos > (64 - 8))
	{
		while (pos != 64)
			p->buffer[pos++]=0;
		p->func_UpdateBlocks(p->state, p->buffer, 1);
		pos = 0;
	}
	memset(&p->buffer[pos], 0, (64 - 8) - pos);
	UInt64 numBits = (p->count << 3);
    SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
    SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
	p->func_UpdateBlocks(p->state, p->buffer, 1);
	SetBe32(digest,      p->state[0]);
	SetBe32(digest + 4,  p->state[1]);
	SetBe32(digest + 8,  p->state[2]);
	SetBe32(digest + 12, p->state[3]);
	SetBe32(digest + 16, p->state[4]);
	Sha1_InitState(p);
}
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
	const UInt64 numBits = (p->count + size) << 3;
	SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
	SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
	SetUi32((UInt32 *)(void *)(block + size), 0x80);
	size += 4;
	while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
	{
		*((UInt32 *)(void *)(block + size)) = 0;
		size += 4;
	}
}
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
	MY_ALIGN (16)
	UInt32 st[SHA1_NUM_DIGEST_WORDS];
	st[0] = p->state[0];
	st[1] = p->state[1];
	st[2] = p->state[2];
	st[3] = p->state[3];
	st[4] = p->state[4];
	p->func_UpdateBlocks(st, data, 1);
	SetBe32(destDigest + 0    , st[0]);
	SetBe32(destDigest + 1 * 4, st[1]);
	SetBe32(destDigest + 2 * 4, st[2]);
	SetBe32(destDigest + 3 * 4, st[3]);
	SetBe32(destDigest + 4 * 4, st[4]);
}
void Sha1Prepare(bool i_flaghardware)
{
	SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
	f = Sha1_UpdateBlocks;
	if (i_flaghardware)
		f = f_hw = Sha1_UpdateBlocks_HW;
	g_FUNC_UPDATE_BLOCKS    = f;
	g_FUNC_UPDATE_BLOCKS_HW = f_hw;
}
#endif  // HWSHA1
/// LICENSE_END.5

/*
	Section: libzpaq
*/

// 1, 2, 4, 8 byte unsigned integers
typedef uint8_t U8;
typedef uint16_t U16;
typedef uint32_t U32;
typedef uint64_t U64;

namespace libzpaq {
// Tables for parsing ZPAQL source code
extern const char* compname[256];    // list of ZPAQL component types
extern const int compsize[256];      // number of bytes to encode a component
extern const char* opcodelist[272];  // list of ZPAQL instructions
// Callback for error handling
extern void error(const char* msg);
// Virtual base classes for input and output
// get() and put() must be overridden to read or write 1 byte.
// read() and write() may be overridden to read or write n bytes more
// efficiently than calling get() or put() n times.
class Reader {
public:
  virtual int get() = 0;  // should return 0..255, or -1 at EOF
  virtual int read(char* buf, int n); // read to buf[n], return no. read
  virtual ~Reader() {}
};
class Writer {
public:
  virtual void put(int c) = 0;  // should output low 8 bits of c
  virtual void write(const char* buf, int n);  // write buf[n]
  virtual ~Writer() {}
};
// Read 16 bit little-endian number
int toU16(const char* p);
// An Array of T is cleared and aligned on a 64 byte address
//   with no constructors called. No copy or assignment.
// Array<T> a(n, ex=0);  - creates n<<ex elements of type T
// a[i] - index
// a(i) - index mod n, n must be a power of 2
// a.size() - gets n
template <typename T>
class Array {
  T *data;     // user location of [0] on a 64 byte boundary
  size_t n;    // user size
  int offset;  // distance back in bytes to start of actual allocation
  void operator=(const Array&);  // no assignment
  Array(const Array&);  // no copy
public:
  Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) {
    resize(sz, ex);} // [0..sz-1] = 0
  void resize(size_t sz, int ex=0); // change size, erase content to zeros
  ~Array() {resize(0);}  // free memory
  size_t size() const {return n;}  // get size
  int isize() const {return int(n);}  // get size as an int
  T& operator[](size_t i) {assert(n>0 && i<n); if (!(n>0 && i<n))
	  {
		    error("09386: operator[] kaputt");
	  }		  
  return data[i];}
  T& operator()(size_t i) {assert(n>0 && (n&(n-1))==0); return data[i&(n-1)];}
};
// Change size to sz<<ex elements of 0
template<typename T>
void Array<T>::resize(size_t sz, int ex) {
  assert(size_t(-1)>0);  // unsigned type?
  while (ex>0) {
    if (sz>sz*2) error("Array too big");
    sz*=2, --ex;
  }
  if (n>0) {
    assert(offset>0 && offset<=64);
    assert((char*)data-offset);
    ::free((char*)data-offset);
	 g_arrayram-=n;
 
  }
  n=0;
  offset=0;
  if (sz==0) return;
  n=sz;
  const size_t nb=128+n*sizeof(T);  // test for overflow
  if (nb<=128 || (nb-128)/sizeof(T)!=n) n=0, error("Array too big");
  data=(T*)::calloc(nb, 1);
  g_arrayram+=nb;
  if (!data) n=0, error("Out of memory");
  offset=64-(((char*)data-(char*)0)&63);
  assert(offset>0 && offset<=64);
  data=(T*)((char*)data+offset);
}
//////////////////////////// SHA1 ////////////////////////////
// SHA1 code, see http://en.wikipedia.org/wiki/SHA-1
#define SHA1CHUNK 64
class SHA1
{
public:
	void put(int c);
	void write(const char* buf, int64_t n);
	uint64_t usize() const {return len/8;}
	const char* result();
	SHA1() {init();}
private:
#ifdef HWSHA1
	int				bufpos;				// 7-Zip SHA1 is rather different from
	CSha1			myhasher;			// the original 7.15. So I put an input buffer
	unsigned char 	w_hw[SHA1CHUNK];	// Slower, in fact, but it works

#else
	#ifdef HWSHA2
		int				bufpos;				// 7-Zip SHA1 is rather different from
		SUM_CONTEXT sum_ctx;
		unsigned char 	w_hw[SHA1CHUNK];	// Slower, in fact, but it works
	#else		// no HW
		U32 w[16];       ///puoi togliere
		U32 h[5];        ///puoi togliere
	#endif
#endif

	U64 len;
	char hbuf[20];
	void process();
	void init();
};
#ifdef HWSHA2

/// This "thing" seems weird, and not very optimized, must be a "plug in" replacment
void SHA1::init()
{
	len=0;
	bufpos=0;
	memset(&sum_ctx,0,sizeof(sum_ctx));
	sha1_init(&sum_ctx);
}
void SHA1::put(int c)
{
	///sha1_write(&sum_ctx,(const uint8_t*)&c,1);

	w_hw[bufpos++]=c;
	if (bufpos==SHA1CHUNK)
	{
		sha1_write(&sum_ctx,w_hw,SHA1CHUNK);
		bufpos=0;
	}
	len+=8;
}
const char* SHA1::result()
{
	sha1_write(&sum_ctx,w_hw,bufpos);
	sha1_final(&sum_ctx);
	memcpy(hbuf, sum_ctx.buf,20);
	init();
	return hbuf;
}
void SHA1::write(const char* buf, int64_t n)
{
	sha1_write(&sum_ctx,(const uint8_t*)buf,n);
	len+=n*8;
}
void SHA1::process()
{
}

#else



#ifdef HWSHA1
/// This "thing" seems weird, and not very optimized, must be a "plug in" replacment
void SHA1::init()
{
	len=0;
	bufpos=0;
	Sha1_Init(&myhasher);
}
void SHA1::put(int c)
{
	w_hw[bufpos++]=c;
	if (bufpos==SHA1CHUNK)
	{
		Sha1_Update(&myhasher,(const Byte*)w_hw,SHA1CHUNK);
		bufpos=0;
	}
	len+=8;
}
const char* SHA1::result()
{
	Sha1_Update(&myhasher,(const Byte*)w_hw,bufpos);
	Sha1_Final(&myhasher,(Byte*)hbuf);
	init();
	return hbuf;
}
void SHA1::write(const char* buf, int64_t n)
{
	Sha1_Update(&myhasher,(const Byte*)buf,n);
	len+=n*8;
}
void SHA1::process()
{
}
#else
///	zpaq 7.15 use a very, very, very good implementation of SHA1, but full of very dirty tricks
void SHA1::init()
{
	len=0;
	memset(w, 0, sizeof(w));
	h[0]=0x67452301;
	h[1]=0xEFCDAB89;
	h[2]=0x98BADCFE;
	h[3]=0x10325476;
	h[4]=0xC3D2E1F0;
}
void SHA1::put(int c)
{
	U32& r=w[U32(len)>>5&15];
	r=(r<<8)|(c&255);
    len+=8;
	if ((U32(len)&511)==0)
		process();
}
const char* SHA1::result()
{
	const U64 s=len;
	put(0x80);
	while ((len&511)!=448)
		put(0);
	put(s>>56);
	put(s>>48);
	put(s>>40);
	put(s>>32);
	put(s>>24);
	put(s>>16);
	put(s>>8);
	put(s);
	for (unsigned int i=0; i<5; ++i)
	{
		hbuf[4*i]=h[i]>>24;
		hbuf[4*i+1]=h[i]>>16;
		hbuf[4*i+2]=h[i]>>8;
		hbuf[4*i+3]=h[i];
	}
	init();
	return hbuf;
}
void SHA1::write(const char* buf, int64_t n)
{
  const unsigned char* p=(const unsigned char*) buf;
  for (; n>0 && (U32(len)&511)!=0; --n) put(*p++);
  for (; n>=64; n-=64) {
    for (unsigned int i=0; i<16; ++i)
      w[i]=p[0]<<24|p[1]<<16|p[2]<<8|p[3], p+=4;
    len+=512;
	process();
  }
  for (; n>0; --n) put(*p++);
}
void SHA1::process()
{
  U32 a=h[0], b=h[1], c=h[2], d=h[3], e=h[4];
  static const U32 k[4]={0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6};
  #define f(a,b,c,d,e,i) \
    if (i>=16) \
      w[(i)&15]^=w[(i-3)&15]^w[(i-8)&15]^w[(i-14)&15], \
      w[(i)&15]=w[(i)&15]<<1|w[(i)&15]>>31; \
    e+=(a<<5|a>>27)+k[(i)/20]+w[(i)&15] \
      +((i)%40>=20 ? b^c^d : i>=40 ? (b&c)|(d&(b|c)) : d^(b&(c^d))); \
    b=b<<30|b>>2;
  #define r(i) f(a,b,c,d,e,i) f(e,a,b,c,d,i+1) f(d,e,a,b,c,i+2) \
               f(c,d,e,a,b,i+3) f(b,c,d,e,a,i+4)
  r(0)  r(5)  r(10) r(15) r(20) r(25) r(30) r(35)
  r(40) r(45) r(50) r(55) r(60) r(65) r(70) r(75)
  #undef f
  #undef r
  h[0]+=a; h[1]+=b; h[2]+=c; h[3]+=d; h[4]+=e;
}
#endif
#endif

//////////////////////////// SHA256 //////////////////////////
// For computing SHA-256 checksums
// http://en.wikipedia.org/wiki/SHA-2


#define SHA2CHUNK 64

class SHA256 {
public:
  void put(int c);
  void write(const char* buf, int64_t n);
  const char* result();
  SHA256() {init();}

private:
  void init();
#ifndef HWSHA2
  unsigned len0, len1; /// puoi togliere?
  unsigned s[8];      ///puoi togliere
  unsigned w[16];     ///puoitogliere
#endif
  char hbuf[32];
  void process();
#ifdef HWSHA2
	int				bufpos;
	SUM_CONTEXT 	sum_ctx;
	unsigned char 	w_hw[SHA2CHUNK];
#endif

};
#ifdef HWSHA2
//////////////////////////// SHA256 //////////////////////////
void SHA256::init()
{
	bufpos=0;
	memset(&sum_ctx,0,sizeof(sum_ctx));
	sha256_init(&sum_ctx);
}

void SHA256::put(int c)
{
	w_hw[bufpos++]=c;
	if (bufpos==SHA2CHUNK)
	{
		sha256_write(&sum_ctx,w_hw,SHA2CHUNK);
		bufpos=0;
	}
}
const char* SHA256::result()
{
	sha256_write(&sum_ctx,w_hw,bufpos);
	sha256_final(&sum_ctx);
	memcpy(hbuf, sum_ctx.buf,32);
	init();
	return hbuf;
}
void SHA256::write(const char* buf, int64_t n)
{
	sha256_write(&sum_ctx,(const uint8_t*)buf,n);
}
void SHA256::process()
{
}

#else

//////////////////////////// SHA256 //////////////////////////
void SHA256::put(int c)
{  // hash 1 byte
    unsigned& r=w[len0>>5&15];
    r=(r<<8)|(c&255);
    if (!(len0+=8)) ++len1;
    if ((len0&511)==0) process();
}

void SHA256::init() {
  len0=len1=0;
  s[0]=0x6a09e667;
  s[1]=0xbb67ae85;
  s[2]=0x3c6ef372;
  s[3]=0xa54ff53a;
  s[4]=0x510e527f;
  s[5]=0x9b05688c;
  s[6]=0x1f83d9ab;
  s[7]=0x5be0cd19;
  memset(w, 0, sizeof(w));
}
void SHA256::process() {
  #define ror(a,b) ((a)>>(b)|(a<<(32-(b))))
  #define m(i) \
     w[(i)&15]+=w[(i-7)&15] \
       +(ror(w[(i-15)&15],7)^ror(w[(i-15)&15],18)^(w[(i-15)&15]>>3)) \
       +(ror(w[(i-2)&15],17)^ror(w[(i-2)&15],19)^(w[(i-2)&15]>>10))
  #define r(a,b,c,d,e,f,g,h,i) { \
    unsigned t1=ror(e,14)^e; \
    t1=ror(t1,5)^e; \
    h+=ror(t1,6)+((e&f)^(~e&g))+k[i]+w[(i)&15]; } \
    d+=h; \
    {unsigned t1=ror(a,9)^a; \
    t1=ror(t1,11)^a; \
    h+=ror(t1,2)+((a&b)^(c&(a^b))); }
  #define mr(a,b,c,d,e,f,g,h,i) m(i); r(a,b,c,d,e,f,g,h,i);
  #define r8(i) \
    r(a,b,c,d,e,f,g,h,i);   \
    r(h,a,b,c,d,e,f,g,i+1); \
    r(g,h,a,b,c,d,e,f,i+2); \
    r(f,g,h,a,b,c,d,e,i+3); \
    r(e,f,g,h,a,b,c,d,i+4); \
    r(d,e,f,g,h,a,b,c,i+5); \
    r(c,d,e,f,g,h,a,b,i+6); \
    r(b,c,d,e,f,g,h,a,i+7);
  #define mr8(i) \
    mr(a,b,c,d,e,f,g,h,i);   \
    mr(h,a,b,c,d,e,f,g,i+1); \
    mr(g,h,a,b,c,d,e,f,i+2); \
    mr(f,g,h,a,b,c,d,e,i+3); \
    mr(e,f,g,h,a,b,c,d,i+4); \
    mr(d,e,f,g,h,a,b,c,i+5); \
    mr(c,d,e,f,g,h,a,b,i+6); \
    mr(b,c,d,e,f,g,h,a,i+7);
  static const unsigned k[64]={
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};
  unsigned a=s[0];
  unsigned b=s[1];
  unsigned c=s[2];
  unsigned d=s[3];
  unsigned e=s[4];
  unsigned f=s[5];
  unsigned g=s[6];
  unsigned h=s[7];
  r8(0);
  r8(8);
  mr8(16);
  mr8(24);
  mr8(32);
  mr8(40);
  mr8(48);
  mr8(56);
  s[0]+=a;
  s[1]+=b;
  s[2]+=c;
  s[3]+=d;
  s[4]+=e;
  s[5]+=f;
  s[6]+=g;
  s[7]+=h;
  #undef mr8
  #undef r8
  #undef mr
  #undef r
  #undef m
  #undef ror
}
const char* SHA256::result() {
  const unsigned s1=len1, s0=len0;
  put(0x80);
  while ((len0&511)!=448) put(0);
  put(s1>>24);
  put(s1>>16);
  put(s1>>8);
  put(s1);
  put(s0>>24);
  put(s0>>16);
  put(s0>>8);
  put(s0);
  for (unsigned int i=0; i<8; ++i) {
    hbuf[4*i]=s[i]>>24;
    hbuf[4*i+1]=s[i]>>16;
    hbuf[4*i+2]=s[i]>>8;
    hbuf[4*i+3]=s[i];
  }
  init();
  return hbuf;
}
void SHA256::write(const char* buf, int64_t n)
{
	for (int64_t i=0;i<n;i++)
		put(*(buf+i));
}
#endif
//////////////////////////// AES /////////////////////////////
// For encrypting with AES in CTR mode.
// The i'th 16 byte block is encrypted by XOR with AES(i)
// (i is big endian or MSB first, starting with 0).
class AES_CTR {
  U32 Te0[256], Te1[256], Te2[256], Te3[256], Te4[256]; // encryption tables
  U32 ek[60];  // round key
  int Nr;  // number of rounds (10, 12, 14 for AES 128, 192, 256)
  U32 iv0, iv1;  // first 8 bytes in CTR mode
public:
  AES_CTR(const char* key, int keylen, const char* iv=0);
    // Schedule: keylen is 16, 24, or 32, iv is 8 bytes or NULL
  void encrypt(U32 s0, U32 s1, U32 s2, U32 s3, unsigned char* ct);
  void encrypt(char* buf, int n, U64 offset);  // encrypt n bytes of buf
};//////////////////////////// stretchKey //////////////////////
// Strengthen password pw[0..pwlen-1] and salt[0..saltlen-1]
// to produce key buf[0..buflen-1]. Uses O(n*r*p) time and 128*r*n bytes
// of memory. n must be a power of 2 and r <= 8.
void scrypt(const char* pw, int pwlen,
            const char* salt, int saltlen,
            int n, int r, int p, char* buf, int buflen);
// Generate a strong key out[0..31] key[0..31] and salt[0..31].
// Calls scrypt(key, 32, salt, 32, 16384, 8, 1, out, 32);
void stretchKey(char* out, const char* key, const char* salt);
//////////////////////////// random //////////////////////////
// Fill buf[0..n-1] with n cryptographic random bytes. The first
// byte is never '7' or 'z'.
void random(char* buf, int n);
//////////////////////////// ZPAQL ///////////////////////////
// Symbolic constants, instruction size, and names
typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType;
extern const int compsize[256];
class Decoder;  // forward
// A ZPAQL machine COMP+HCOMP or PCOMP.
class ZPAQL {
public:
  ZPAQL();
  ~ZPAQL();
  void clear();           // Free memory, erase program, reset machine state
  void inith();           // Initialize as HCOMP to run
  void initp();           // Initialize as PCOMP to run
  double memory();        // Return memory requirement in bytes
  void run(U32 input);    // Execute with input
  int read(Reader* in2);  // Read header
  bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header
  int step(U32 input, int mode);  // Trace execution (defined externally)
  Writer* output;         // Destination for OUT instruction, or 0 to suppress
  SHA1* sha1;             // Points to checksum computer
  U32 H(int i) {return h(i);}  // get element of h
  void flush();           // write outbuf[0..bufptr-1] to output and sha1
  void outc(int ch) {     // output byte ch (0..255) or -1 at EOS
    if (ch<0 || (outbuf[bufptr]=ch, ++bufptr==outbuf.isize())) flush();
  }
  // ZPAQ1 block header
  Array<U8> header;   // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard)
  int cend;           // COMP in header[7...cend-1]
  int hbegin, hend;   // HCOMP/PCOMP in header[hbegin...hend-1]
private:
  // Machine state for executing HCOMP
  Array<U8> m;        // memory array M for HCOMP
  Array<U32> h;       // hash array H for HCOMP
  Array<U32> r;       // 256 element register array
  Array<char> outbuf; // output buffer
  int bufptr;         // number of bytes in outbuf
  U32 a, b, c, d;     // machine registers
  int f;              // condition flag
  int pc;             // program counter
  int rcode_size;     // length of rcode
  U8* rcode;          // JIT code for run()
  // Support code
  int assemble();  // put JIT code in rcode
  void init(int hbits, int mbits);  // initialize H and M sizes
  int execute();  // interpret 1 instruction, return 0 after HALT, else 1
  void run0(U32 input);  // default run() if not JIT
  void zdiv(U32 x) {if (x) a/=x; else a=0;}
  void mod(U32 x) {if (x) a%=x; else a=0;}
  void swap(U32& x) {a^=x; x^=a; a^=x;}
  void swap(U8& x)  {a^=x; x^=a; a^=x;}
  void err();  // exit with run time error
};
///////////////////////// Component //////////////////////////
// A Component is a context model, indirect context model, match model,
// fixed weight mixer, adaptive 2 input mixer without or with current
// partial byte as context, adaptive m input mixer (without or with),
// or SSE (without or with).
struct Component {
  size_t limit;   // max count for cm
  size_t cxt;     // saved context
  size_t a, b, c; // multi-purpose variables
  Array<U32> cm;  // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index
  Array<U8> ht;   // ICM/ISSE hash table[0..size1][0..15] and MATCH buf
  Array<U16> a16; // MIX weights
  void init();    // initialize to all 0
  Component() {init();}
};
////////////////////////// StateTable ////////////////////////
// Next state table
class StateTable {
public:
  U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1
  int next(int state, int y) {  // next state for bit y
    assert(state>=0 && state<256);
    assert(y>=0 && y<4);
    return ns[state*4+y];
  }
  int cminit(int state) {  // initial probability of 1 * 2^23
    assert(state>=0 && state<256);
    return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1);
  }
  StateTable();
};
///////////////////////// Predictor //////////////////////////
// A predictor guesses the next bit
class Predictor {
public:
  Predictor(ZPAQL&);
  ~Predictor();
  void init();          // build model
  int predict();        // probability that next bit is a 1 (0..4095)
  void update(int y);   // train on bit y (0..1)
  int stat(int);        // Defined externally
  bool isModeled() {    // n>0 components?
    assert(z.header.isize()>6);
    return z.header[6]!=0;
  }
private:
  // Predictor state
  int c8;               // last 0...7 bits.
  int hmap4;            // c8 split into nibbles
  int p[256];           // predictions
  U32 h[256];           // unrolled copy of z.h
  ZPAQL& z;             // VM to compute context hashes, includes H, n
  Component comp[256];  // the model, includes P
  bool initTables;      // are tables initialized?
  // Modeling support functions
  int predict0();       // default
  void update0(int y);  // default
  int dt2k[256];        // division table for match: dt2k[i] = 2^12/i
  int dt[1024];         // division table for cm: dt[i] = 2^16/(i+1.5)
  U16 squasht[4096];    // squash() lookup table
  short stretcht[32768];// stretch() lookup table
  StateTable st;        // next, cminit functions
  U8* pcode;            // JIT code for predict() and update()
  int pcode_size;       // length of pcode
  // reduce prediction error in cr.cm
  void train(Component& cr, int y) {
    assert(y==0 || y==1);
    U32& pn=cr.cm(cr.cxt);
    U32 count=pn&0x3ff;
    int error=y*32767-(cr.cm(cr.cxt)>>17);
    pn+=(error*dt[count]&-1024)+(count<cr.limit);
  }
  // x -> floor(32768/(1+exp(-x/64)))
  int squash(int x) {
    assert(initTables);
    assert(x>=-2048 && x<=2047);
    return squasht[x+2048];
  }
  // x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash
  int stretch(int x) {
    assert(initTables);
    assert(x>=0 && x<=32767);
    return stretcht[x];
  }
  // bound x to a 12 bit signed int
  int clamp2k(int x) {
    if (x<-2048) return -2048;
    else if (x>2047) return 2047;
    else return x;
  }
  // bound x to a 20 bit signed int
  int clamp512k(int x) {
    if (x<-(1<<19)) return -(1<<19);
    else if (x>=(1<<19)) return (1<<19)-1;
    else return x;
  }
  // Get cxt in ht, creating a new row if needed
  size_t find(Array<U8>& ht, int sizebits, U32 cxt);
  // Put JIT code in pcode
  int assemble_p();
};
//////////////////////////// Decoder /////////////////////////
// Decoder decompresses using an arithmetic code
class Decoder: public Reader {
public:
  Reader* in;        // destination
  Decoder(ZPAQL& z);
  int decompress();  // return a byte or EOF
  int skip();        // skip to the end of the segment, return next byte
  void init();       // initialize at start of block
  int stat(int x) {return pr.stat(x);}
  int get() {        // return 1 byte of buffered input or EOF
    if (rpos==wpos) {
      rpos=0;
      wpos=in ? in->read(&buf[0], BUFSIZE) : 0;
      assert(wpos<=BUFSIZE);
    }
    return rpos<wpos ? U8(buf[rpos++]) : -1;
  }
  int buffered() {return wpos-rpos;}  // how far read ahead?
private:
  U32 low, high;     // range
  U32 curr;          // last 4 bytes of archive or remaining bytes in subblock
  U32 rpos, wpos;    // read, write position in buf
  Predictor pr;      // to get p
  enum {BUFSIZE=1<<16};
  Array<char> buf;   // input buffer of size BUFSIZE bytes
  int decode(int p); // return decoded bit (0..1) with prob. p (0..65535)
};
/////////////////////////// PostProcessor ////////////////////
class PostProcessor {
  int state;   // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST
  int hsize;   // header size
  int ph, pm;  // sizes of H and M in z
public:
  ZPAQL z;     // holds PCOMP
  PostProcessor(): state(0), hsize(0), ph(0), pm(0) {}
  void init(int h, int m);  // ph, pm sizes of H and M
  int write(int c);  // Input a byte, return state
  int getState() const {return state;}
  void setOutput(Writer* out) {z.output=out;}
  void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;}
};
//////////////////////// Decompresser ////////////////////////
// For decompression and listing archive contents
class Decompresser {
public:
  Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {}
  void setInput(Reader* in) {dec.in=in;}
  bool findBlock(double* memptr = 0);
  void hcomp(Writer* out2) {z.write(out2, false);}
  bool findFilename(Writer* = 0);
  void readComment(Writer* = 0);
  void setOutput(Writer* out) {pp.setOutput(out);}
  void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);}
  bool decompress(int n = -1);  // n bytes, -1=all, return true until done
  bool pcomp(Writer* out2) {return pp.z.write(out2, true);}
  void readSegmentEnd(char* sha1string = 0);
  int stat(int x) {return dec.stat(x);}
  int buffered() {return dec.buffered();}
private:
  ZPAQL z;
  Decoder dec;
  PostProcessor pp;
  enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state;  // expected next
  enum {FIRSTSEG, SEG, SKIP} decode_state;  // which segment in block?
};
/////////////////////////// decompress() /////////////////////
void decompress(Reader* in, Writer* out);
//////////////////////////// Encoder /////////////////////////
// Encoder compresses using an arithmetic code
class Encoder {
public:
  Encoder(ZPAQL& z):
    out(0), low(1), high(0xFFFFFFFF), pr(z) {}
  void init();
  void compress(int c);  // c is 0..255 or EOF
  int stat(int x) {return pr.stat(x);}
  Writer* out;  // destination
private:
  U32 low, high; // range
  Predictor pr;  // to get p
  Array<char> buf; // unmodeled input
  void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535)
};
//////////////////////////// Compiler ////////////////////////
// Input ZPAQL source code with args and store the compiled code
// in hz and pz and write pcomp_cmd to out2.
class Compiler {
public:
  Compiler(const char* in, int* args, ZPAQL& hz, ZPAQL& pz, Writer* out2);
private:
  const char* in;  // ZPAQL source code
  int* args;       // Array of up to 9 args, default NULL = all 0
  ZPAQL& hz;       // Output of COMP and HCOMP sections
  ZPAQL& pz;       // Output of PCOMP section
  Writer* out2;    // Output ... of "PCOMP ... ;"
  int line;        // Input line number for reporting errors
  int state;       // parse state: 0=space -1=word >0 (nest level)
  // Symbolic constants
  typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE,
    JT=39,JF=47,JMP=63,LJ=255,
    POST=256,PCOMP,END,IF,IFNOT,ELSE,ENDIF,DO,
    WHILE,UNTIL,FOREVER,IFL,IFNOTL,ELSEL,SEMICOLON} CompType;
  void syntaxError(const char* msg, const char* expected=0); // error()
  void next();                     // advance in to next token
  bool matchToken(const char* tok);// in==token?
  int rtoken(int low, int high);   // return token which must be in range
  int rtoken(const char* list[]);  // return token by position in list
  void rtoken(const char* s);      // return token which must be s
  int compile_comp(ZPAQL& z);      // compile either HCOMP or PCOMP
  // Stack of n elements
  class Stack {
    libzpaq::Array<U16> s;
    size_t top;
  public:
    Stack(int n): s(n), top(0) {}
    void push(const U16& x) {
      if (top>=s.size()) error("IF or DO nested too deep");
      s[top++]=x;
    }
    U16 pop() {
      if (top<=0) error("unmatched IF or DO");
      return s[--top];
    }
  };
  Stack if_stack, do_stack;
};
//////////////////////// Compressor //////////////////////////
class Compressor {
public:
  Compressor(): enc(z), in(0), state(INIT), verify(false) {}
  void setOutput(Writer* out) {enc.out=out;}
  void writeTag();
  void startBlock(int level);  // level=1,2,3
  void startBlock(const char* hcomp);     // ZPAQL byte code
  void startBlock(const char* config,     // ZPAQL source code
                  int* args,              // NULL or int[9] arguments
                  Writer* pcomp_cmd = 0); // retrieve preprocessor command
#ifdef DEBUG
  void setVerify(bool v) {verify = v;}    // check postprocessing?
#endif
  void hcomp(Writer* out2) {z.write(out2, false);}
  bool pcomp(Writer* out2) {return pz.write(out2, true);}
  void startSegment(const char* filename = 0, const char* comment = 0);
  void setInput(Reader* i) {in=i;}
  void postProcess(const char* pcomp = 0, int len = 0);  // byte code
  bool compress(int n = -1);  // n bytes, -1=all, return true until done
  void endSegment(const char* sha1string = 0);
#ifdef DEBUG
  char* endSegmentChecksum(int64_t* size = 0, bool dosha1=true);
#endif
  void endBlock();
  int stat(int x) {return enc.stat(x);}
private:
  ZPAQL z, pz;  // model and test postprocessor
  Encoder enc;  // arithmetic encoder containing predictor
  Reader* in;   // input source
  SHA1 sha1;    // to test pz output
///  char sha1result[20];  // sha1 output
  enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state;
  bool verify;  // if true then test by postprocessing
};
/////////////////////////// StringBuffer /////////////////////
// For (de)compressing to/from a string. Writing appends bytes
// which can be later read.
class StringBuffer: public libzpaq::Reader, public libzpaq::Writer {
  unsigned char* p;  // allocated memory, not NUL terminated, may be NULL
  size_t al;         // number of bytes allocated, 0 iff p is NULL
  size_t wpos;       // index of next byte to write, wpos <= al
  size_t rpos;       // index of next byte to read, rpos < wpos or return EOF.
  size_t limit;      // max size, default = -1
  const size_t init; // initial size on first use after reset
  // Increase capacity to a without changing size
  void reserve(size_t a) {
    assert(!al==!p);
    if (a<=al) return;
	///g_allocatedram+=a;
    unsigned char* q=0;
    if (a>0) q=(unsigned char*)(p ? franz_extend(p,a,al) : franz_malloc(a));
    if (a>0 && !q) error("Out of memory");
    p=q;
    al=a;
  }
  // Enlarge al to make room to write at least n bytes.
  void lengthen(size_t n) {
    assert(wpos<=al);
    if (wpos+n>limit || wpos+n<wpos) error("StringBuffer overflow");
    if (wpos+n<=al) return;
    size_t a=al;
    while (wpos+n>=a) a=a*2+init;
    reserve(a);
  }
  // No assignment or copy
  void operator=(const StringBuffer&);
  StringBuffer(const StringBuffer&);
public:
  // Direct access to data
  unsigned char* data() {assert(p || wpos==0); return p;}
  // Allocate no memory initially
  StringBuffer(size_t n=0):
      p(0), al(0), wpos(0), rpos(0), limit(size_t(-1)), init(n>128?n:128) {}
  // Set output limit
  void setLimit(size_t n) {limit=n;}
  // Free memory
  ~StringBuffer() {if (p) {franz_free(p);g_allocatedram-=al;}}
  // Return number of bytes written.
  size_t size() const {return wpos;}
  // Return number of bytes left to read
  ///size_t remaining() const {return wpos-rpos;}
  // Reset size to 0 and free memory.
  void reset() {
	  
    if (p) 
	{franz_free(p);
	g_allocatedram-=al;
	}
    p=0;
    al=rpos=wpos=0;
  }
  // Write a single byte.
  void put(int c) {  // write 1 byte
    lengthen(1);
    assert(p);
    assert(wpos<al);
    p[wpos++]=c;
    assert(wpos<=al);
  }
  // Write buf[0..n-1]. If buf is NULL then advance write pointer only.
  void write(const char* buf, int n) {
    if (n<1) return;
    lengthen(n);
    assert(p);
    assert(wpos+n<=al);
    if (buf) memcpy(p+wpos, buf, n);
    wpos+=n;
  }
  // Read a single byte. Return EOF (-1) at end.
  int get() {
    assert(rpos<=wpos);
    assert(rpos==wpos || p);
    return rpos<wpos ? p[rpos++] : -1;
  }
  // Read up to n bytes into buf[0..] or fewer if EOF is first.
  // Return the number of bytes actually read.
  // If buf is NULL then advance read pointer without reading.
  int read(char* buf, int n) {
    assert(rpos<=wpos);
    assert(wpos<=al);
    assert(!al==!p);
    if (rpos+n>wpos) n=wpos-rpos;
    if (n>0 && buf) memcpy(buf, p+rpos, n);
    rpos+=n;
    return n;
  }
  // Return the entire string as a read-only array.
  const char* c_str() const {return (const char*)p;}
  // Truncate the string to size i.
  void resize(size_t i) {
    wpos=i;
    if (rpos>wpos) rpos=wpos;
  }
  // Swap efficiently (init is not swapped)
  void swap(StringBuffer& s) {
    std::swap(p, s.p);
    std::swap(al, s.al);
    std::swap(wpos, s.wpos);
    std::swap(rpos, s.rpos);
    std::swap(limit, s.limit);
  }
};


/////////////////////////// compress() ///////////////////////
// Compress in to out in multiple blocks. Default method is "14,128,0"
// Default filename is "". Comment is appended to input size.
// dosha1 means save the SHA-1 checksum.
void compress(Reader* in, Writer* out, const char* method,
     const char* filename=0, const char* comment=0, bool dosha1=true);
// Same as compress() but output is 1 block, ignoring block size parameter.
void compressBlock(StringBuffer* in, Writer* out, const char* method,
     const char* filename=0, const char* comment=0, bool dosha1=true);
// Read 16 bit little-endian number
int toU16(const char* p) {
  return (p[0]&255)+256*(p[1]&255);
}
// Default read() and write()
int Reader::read(char* buf, int n) {
  int i=0, c;
  while (i<n && (c=get())>=0)
    buf[i++]=c;
  return i;
}
void Writer::write(const char* buf, int n) {
  for (int i=0; i<n; ++i)
    put(U8(buf[i]));
}


///////////////////////// allocx //////////////////////
// Allocate newsize > 0 bytes of executable memory and update
// p to point to it and newsize = n. Free any previously
// allocated memory first. If newsize is 0 then free only.
// Call error in case of failure. If NOJIT, ignore newsize
// and set p=0, n=0 without allocating memory.
void allocx(U8* &p, int &n, int newsize) {
	if (flagnojit)
	{
		p=0;
		n=0;
	}
	else
	{

  if (p || n) {
    if (p)
#ifdef unix
      munmap(p, n);
#else // Windows
      VirtualFree(p, 0, MEM_RELEASE);
#endif
	g_allocatedram-=n;
    p=0;
    n=0;
  }
  if (newsize>0) 
  {
#ifdef unix
	///myprintf("BEFORE mmap of newsize %s\n",migliaia(newsize));
	/// PROT_EXEC can be stopped
    p=(U8*)mmap(0, newsize, PROT_READ|PROT_WRITE|PROT_EXEC,MAP_PRIVATE|MAP_ANON, -1, 0);
	////myprintf("AFTER mmap\n");

    if ((void*)p==MAP_FAILED) 
	{
		if (flagdebug3)
			myprintf("10253$ MAP FAILED!\n");
		p=0;
	}
#else
    p=(U8*)VirtualAlloc(0, newsize, MEM_RESERVE|MEM_COMMIT,PAGE_EXECUTE_READWRITE);
#endif

    if (p)
	{
	  g_allocatedram+=newsize;
      n=newsize;
	}
    else 
	{
		n=0;
		myprintf("\n\n");
		myprintf("02249! allocx KO for %s (maybe non-Intel CPU? Unsupported OS?)\n",migliaia(newsize));
		myprintf("02250! try to run with -nojit or compile using -DNOJIT\n\n");
		color_restore();
		exit(0);
    }
  }
	}
}

/// LICENSE_START.1
//////////////////////////// AES /////////////////////////////
// Some AES code is derived from libtomcrypt 1.17 (public domain).
#define Te4_0 0x000000FF & Te4
#define Te4_1 0x0000FF00 & Te4
#define Te4_2 0x00FF0000 & Te4
#define Te4_3 0xFF000000 & Te4
// Extract byte n of x
static inline unsigned byte(unsigned x, unsigned n) {return (x>>(8*n))&255;}
// x = y[0..3] MSB first
static inline void LOAD32H(U32& x, const char* y) {
  const unsigned char* u=(const unsigned char*)y;
  x=u[0]<<24|u[1]<<16|u[2]<<8|u[3];
}
// y[0..3] = x MSB first
static inline void STORE32H(U32& x, unsigned char* y) {
  y[0]=x>>24;
  y[1]=x>>16;
  y[2]=x>>8;
  y[3]=x;
}
#define setup_mix(temp) \
  ((Te4_3[byte(temp, 2)]) ^ (Te4_2[byte(temp, 1)]) ^ \
   (Te4_1[byte(temp, 0)]) ^ (Te4_0[byte(temp, 3)]))
// Initialize encryption tables and round key. keylen is 16, 24, or 32.
AES_CTR::AES_CTR(const char* key, int keylen, const char* iv) {
  assert(key  != NULL);
  assert(keylen==16 || keylen==24 || keylen==32);
  // Initialize IV (default 0)
  iv0=iv1=0;
  if (iv) {
    LOAD32H(iv0, iv);
    LOAD32H(iv1, iv+4);
  }
  // Initialize encryption tables
  for (unsigned int i=0; i<256; ++i) {
    unsigned s1=
    "\x63\x7c\x77\x7b\xf2\x6b\x6f\xc5\x30\x01\x67\x2b\xfe\xd7\xab\x76"
    "\xca\x82\xc9\x7d\xfa\x59\x47\xf0\xad\xd4\xa2\xaf\x9c\xa4\x72\xc0"
    "\xb7\xfd\x93\x26\x36\x3f\xf7\xcc\x34\xa5\xe5\xf1\x71\xd8\x31\x15"
    "\x04\xc7\x23\xc3\x18\x96\x05\x9a\x07\x12\x80\xe2\xeb\x27\xb2\x75"
    "\x09\x83\x2c\x1a\x1b\x6e\x5a\xa0\x52\x3b\xd6\xb3\x29\xe3\x2f\x84"
    "\x53\xd1\x00\xed\x20\xfc\xb1\x5b\x6a\xcb\xbe\x39\x4a\x4c\x58\xcf"
    "\xd0\xef\xaa\xfb\x43\x4d\x33\x85\x45\xf9\x02\x7f\x50\x3c\x9f\xa8"
    "\x51\xa3\x40\x8f\x92\x9d\x38\xf5\xbc\xb6\xda\x21\x10\xff\xf3\xd2"
    "\xcd\x0c\x13\xec\x5f\x97\x44\x17\xc4\xa7\x7e\x3d\x64\x5d\x19\x73"
    "\x60\x81\x4f\xdc\x22\x2a\x90\x88\x46\xee\xb8\x14\xde\x5e\x0b\xdb"
    "\xe0\x32\x3a\x0a\x49\x06\x24\x5c\xc2\xd3\xac\x62\x91\x95\xe4\x79"
    "\xe7\xc8\x37\x6d\x8d\xd5\x4e\xa9\x6c\x56\xf4\xea\x65\x7a\xae\x08"
    "\xba\x78\x25\x2e\x1c\xa6\xb4\xc6\xe8\xdd\x74\x1f\x4b\xbd\x8b\x8a"
    "\x70\x3e\xb5\x66\x48\x03\xf6\x0e\x61\x35\x57\xb9\x86\xc1\x1d\x9e"
    "\xe1\xf8\x98\x11\x69\xd9\x8e\x94\x9b\x1e\x87\xe9\xce\x55\x28\xdf"
    "\x8c\xa1\x89\x0d\xbf\xe6\x42\x68\x41\x99\x2d\x0f\xb0\x54\xbb\x16"
    [i]&255;
    unsigned s2=s1<<1;
    if (s2>=0x100) s2^=0x11b;
    unsigned s3=s1^s2;
    Te0[i]=s2<<24|s1<<16|s1<<8|s3;
    Te1[i]=s3<<24|s2<<16|s1<<8|s1;
    Te2[i]=s1<<24|s3<<16|s2<<8|s1;
    Te3[i]=s1<<24|s1<<16|s3<<8|s2;
    Te4[i]=s1<<24|s1<<16|s1<<8|s1;
  }
  // setup the forward key
  Nr = 10 + ((keylen/8)-2)*2;  // 10, 12, or 14 rounds
  int i = 0;
  U32* rk = &ek[0];
  U32 temp;
  static const U32 rcon[10] = {
    0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
    0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL,
    0x1B000000UL, 0x36000000UL};  // round constants
  LOAD32H(rk[0], key   );
  LOAD32H(rk[1], key +  4);
  LOAD32H(rk[2], key +  8);
  LOAD32H(rk[3], key + 12);
  if (keylen == 16) {
    for (;;) {
      temp  = rk[3];
      rk[4] = rk[0] ^ setup_mix(temp) ^ rcon[i];
      rk[5] = rk[1] ^ rk[4];
      rk[6] = rk[2] ^ rk[5];
      rk[7] = rk[3] ^ rk[6];
      if (++i == 10) {
         break;
      }
      rk += 4;
    }
  }
  else if (keylen == 24) {
    LOAD32H(rk[4], key + 16);
    LOAD32H(rk[5], key + 20);
    for (;;) {
      temp = rk[5];
      rk[ 6] = rk[ 0] ^ setup_mix(temp) ^ rcon[i];
      rk[ 7] = rk[ 1] ^ rk[ 6];
      rk[ 8] = rk[ 2] ^ rk[ 7];
      rk[ 9] = rk[ 3] ^ rk[ 8];
      if (++i == 8) {
        break;
      }
      rk[10] = rk[ 4] ^ rk[ 9];
      rk[11] = rk[ 5] ^ rk[10];
      rk += 6;
    }
  }
  else if (keylen == 32) {
    LOAD32H(rk[4], key + 16);
    LOAD32H(rk[5], key + 20);
    LOAD32H(rk[6], key + 24);
    LOAD32H(rk[7], key + 28);
    for (;;) {
      temp = rk[7];
      rk[ 8] = rk[ 0] ^ setup_mix(temp) ^ rcon[i];
      rk[ 9] = rk[ 1] ^ rk[ 8];
      rk[10] = rk[ 2] ^ rk[ 9];
      rk[11] = rk[ 3] ^ rk[10];
      if (++i == 7) {
        break;
      }
      temp = rk[11];
      rk[12] = rk[ 4] ^ setup_mix(temp<<24|temp>>8);
      rk[13] = rk[ 5] ^ rk[12];
      rk[14] = rk[ 6] ^ rk[13];
      rk[15] = rk[ 7] ^ rk[14];
      rk += 8;
    }
  }
}
// Encrypt to ct[16]
void AES_CTR::encrypt(U32 s0, U32 s1, U32 s2, U32 s3, unsigned char* ct) {
  int r = Nr >> 1;
  U32 *rk = &ek[0];
  U32 t0=0, t1=0, t2=0, t3=0;
  s0 ^= rk[0];
  s1 ^= rk[1];
  s2 ^= rk[2];
  s3 ^= rk[3];
  for (;;) {
    t0 =
      Te0[byte(s0, 3)] ^
      Te1[byte(s1, 2)] ^
      Te2[byte(s2, 1)] ^
      Te3[byte(s3, 0)] ^
      rk[4];
    t1 =
      Te0[byte(s1, 3)] ^
      Te1[byte(s2, 2)] ^
      Te2[byte(s3, 1)] ^
      Te3[byte(s0, 0)] ^
      rk[5];
    t2 =
      Te0[byte(s2, 3)] ^
      Te1[byte(s3, 2)] ^
      Te2[byte(s0, 1)] ^
      Te3[byte(s1, 0)] ^
      rk[6];
    t3 =
      Te0[byte(s3, 3)] ^
      Te1[byte(s0, 2)] ^
      Te2[byte(s1, 1)] ^
      Te3[byte(s2, 0)] ^
      rk[7];
    rk += 8;
    if (--r == 0) {
      break;
    }
    s0 =
      Te0[byte(t0, 3)] ^
      Te1[byte(t1, 2)] ^
      Te2[byte(t2, 1)] ^
      Te3[byte(t3, 0)] ^
      rk[0];
    s1 =
      Te0[byte(t1, 3)] ^
      Te1[byte(t2, 2)] ^
      Te2[byte(t3, 1)] ^
      Te3[byte(t0, 0)] ^
      rk[1];
    s2 =
      Te0[byte(t2, 3)] ^
      Te1[byte(t3, 2)] ^
      Te2[byte(t0, 1)] ^
      Te3[byte(t1, 0)] ^
      rk[2];
    s3 =
      Te0[byte(t3, 3)] ^
      Te1[byte(t0, 2)] ^
      Te2[byte(t1, 1)] ^
      Te3[byte(t2, 0)] ^
      rk[3];
  }
  // apply last round and map cipher state to byte array block:
  s0 =
    (Te4_3[byte(t0, 3)]) ^
    (Te4_2[byte(t1, 2)]) ^
    (Te4_1[byte(t2, 1)]) ^
    (Te4_0[byte(t3, 0)]) ^
    rk[0];
  STORE32H(s0, ct);
  s1 =
    (Te4_3[byte(t1, 3)]) ^
    (Te4_2[byte(t2, 2)]) ^
    (Te4_1[byte(t3, 1)]) ^
    (Te4_0[byte(t0, 0)]) ^
    rk[1];
  STORE32H(s1, ct+4);
  s2 =
    (Te4_3[byte(t2, 3)]) ^
    (Te4_2[byte(t3, 2)]) ^
    (Te4_1[byte(t0, 1)]) ^
    (Te4_0[byte(t1, 0)]) ^
    rk[2];
  STORE32H(s2, ct+8);
  s3 =
    (Te4_3[byte(t3, 3)]) ^
    (Te4_2[byte(t0, 2)]) ^
    (Te4_1[byte(t1, 1)]) ^
    (Te4_0[byte(t2, 0)]) ^
    rk[3];
  STORE32H(s3, ct+12);
}
// Encrypt or decrypt slice buf[0..n-1] at offset by XOR with AES(i) where
// i is the 128 bit big-endian distance from the start in 16 byte blocks.
void AES_CTR::encrypt(char* buf, int n, U64 offset) {
  for (U64 i=offset/16; i<=(offset+n)/16; ++i) {
    unsigned char ct[16];
    encrypt(iv0, iv1, i>>32, i, ct);
    for (int j=0; j<16; ++j) {
      const int k=(i*16-offset)+j;
      if (k>=0 && k<n)
        buf[k]^=ct[j];
    }
  }
}
#undef setup_mix
#undef Te4_3
#undef Te4_2
#undef Te4_1
#undef Te4_0
//////////////////////////// stretchKey //////////////////////
// PBKDF2(pw[0..pwlen], salt[0..saltlen], c) to buf[0..dkLen-1]
// using HMAC-SHA256, for the special case of c = 1 iterations
// output size dkLen a multiple of 32, and pwLen <= 64.
static void pbkdf2(const char* pw, int pwLen, const char* salt, int saltLen,
                    char* buf, int dkLen) {
  assert(dkLen%32==0);
  assert(pwLen<=64);
  libzpaq::SHA256 sha256;
  char b[32];
  for (int i=1; i*32<=dkLen; ++i) {
    for (int j=0; j<pwLen; ++j) sha256.put(pw[j]^0x36);
    for (int j=pwLen; j<64; ++j) sha256.put(0x36);
    for (int j=0; j<saltLen; ++j) sha256.put(salt[j]);
    for (int j=24; j>=0; j-=8) sha256.put(i>>j);
    memcpy(b, sha256.result(), 32);
    for (int j=0; j<pwLen; ++j) sha256.put(pw[j]^0x5c);
    for (int j=pwLen; j<64; ++j) sha256.put(0x5c);
    for (int j=0; j<32; ++j) sha256.put(b[j]);
    memcpy(buf+i*32-32, sha256.result(), 32);
  }
}
/// LICENSE_END.1

/// LICENSE_START.2
// Hash b[0..15] using 8 rounds of salsa20
// Modified from http://cr.yp.to/salsa20.html (public domain) to 8 rounds
static void salsa8(U32* b) {
  unsigned x[16]={0};
  memcpy(x, b, 64);
  for (unsigned int i=0; i<4; ++i) {
    #define R(a,b) (((a)<<(b))+((a)>>(32-b)))
    x[ 4] ^= R(x[ 0]+x[12], 7);  x[ 8] ^= R(x[ 4]+x[ 0], 9);
    x[12] ^= R(x[ 8]+x[ 4],13);  x[ 0] ^= R(x[12]+x[ 8],18);
    x[ 9] ^= R(x[ 5]+x[ 1], 7);  x[13] ^= R(x[ 9]+x[ 5], 9);
    x[ 1] ^= R(x[13]+x[ 9],13);  x[ 5] ^= R(x[ 1]+x[13],18);
    x[14] ^= R(x[10]+x[ 6], 7);  x[ 2] ^= R(x[14]+x[10], 9);
    x[ 6] ^= R(x[ 2]+x[14],13);  x[10] ^= R(x[ 6]+x[ 2],18);
    x[ 3] ^= R(x[15]+x[11], 7);  x[ 7] ^= R(x[ 3]+x[15], 9);
    x[11] ^= R(x[ 7]+x[ 3],13);  x[15] ^= R(x[11]+x[ 7],18);
    x[ 1] ^= R(x[ 0]+x[ 3], 7);  x[ 2] ^= R(x[ 1]+x[ 0], 9);
    x[ 3] ^= R(x[ 2]+x[ 1],13);  x[ 0] ^= R(x[ 3]+x[ 2],18);
    x[ 6] ^= R(x[ 5]+x[ 4], 7);  x[ 7] ^= R(x[ 6]+x[ 5], 9);
    x[ 4] ^= R(x[ 7]+x[ 6],13);  x[ 5] ^= R(x[ 4]+x[ 7],18);
    x[11] ^= R(x[10]+x[ 9], 7);  x[ 8] ^= R(x[11]+x[10], 9);
    x[ 9] ^= R(x[ 8]+x[11],13);  x[10] ^= R(x[ 9]+x[ 8],18);
    x[12] ^= R(x[15]+x[14], 7);  x[13] ^= R(x[12]+x[15], 9);
    x[14] ^= R(x[13]+x[12],13);  x[15] ^= R(x[14]+x[13],18);
    #undef R
  }
  for (unsigned int i=0; i<16; ++i) b[i]+=x[i];
}
// BlockMix_{Salsa20/8, r} on b[0..128*r-1]
static void blockmix(U32* b, int r) {
  assert(r<=8);
  U32 x[16];
  U32 y[256];
  memcpy(x, b+32*r-16, 64);
  for (int i=0; i<2*r; ++i) {
    for (int j=0; j<16; ++j) x[j]^=b[i*16+j];
    salsa8(x);
    memcpy(&y[i*16], x, 64);
  }
  for (int i=0; i<r; ++i) memcpy(b+i*16, &y[i*32], 64);
  for (int i=0; i<r; ++i) memcpy(b+(i+r)*16, &y[i*32+16], 64);
}
// Mix b[0..128*r-1]. Uses 128*r*n bytes of memory and O(r*n) time
static void smix(char* b, int r, int n) {
  libzpaq::Array<U32> x(32*r), v(32*r*n);
  for (int i=0; i<r*128; ++i) x[i/4]+=(b[i]&255)<<i%4*8;
  for (int i=0; i<n; ++i) {
    memcpy(&v[i*r*32], &x[0], r*128);
    blockmix(&x[0], r);
  }
  for (int i=0; i<n; ++i) {
    U32 j=x[(2*r-1)*16]&(n-1);
    for (int k=0; k<r*32; ++k) x[k]^=v[j*r*32+k];
    blockmix(&x[0], r);
  }
  for (int i=0; i<r*128; ++i) b[i]=x[i/4]>>(i%4*8);
}
// Strengthen password pw[0..pwlen-1] and salt[0..saltlen-1]
// to produce key buf[0..buflen-1]. Uses O(n*r*p) time and 128*r*n bytes
// of memory. n must be a power of 2 and r <= 8.
void scrypt(const char* pw, int pwlen,
            const char* salt, int saltlen,
            int n, int r, int p, char* buf, int buflen) {
  assert(r<=8);
  assert(n>0 && (n&(n-1))==0);  // power of 2?
  libzpaq::Array<char> b(p*r*128);
  pbkdf2(pw, pwlen, salt, saltlen,  &b[0], p*r*128);
  for (int i=0; i<p; ++i) smix(&b[i*r*128], r, n);
  pbkdf2(pw, pwlen, &b[0], p*r*128,  buf, buflen);
}
// Stretch key in[0..31], assumed to be SHA256(password), with
// NUL terminate salt to produce new key out[0..31]
void stretchKey(char* out, const char* in, const char* salt) {
  scrypt(in, 32, salt, 32, 1<<14, 8, 1, out, 32);
}
/// LICENSE_END.2



//////////////////////////// random //////////////////////////
// Put n cryptographic random bytes in buf[0..n-1].
// The first byte will not be 'z' or '7' (start of a ZPAQ archive).
// For a pure random number, discard the first byte.
// In VC++, must link to advapi32.lib.
void random(char* buf, int n) {
#ifdef unix
  FILE* in=fopen("/dev/urandom", "rb");
  if (in && int(fread(buf, 1, n, in))==n)
    fclose(in);
  else {
    error("key generation failed");
  }
#else
  HCRYPTPROV h;
  if (CryptAcquireContext(&h, NULL, NULL, PROV_RSA_FULL,
      CRYPT_VERIFYCONTEXT) && CryptGenRandom(h, n, (BYTE*)buf))
    CryptReleaseContext(h, 0);
  else {
    printf("CryptGenRandom: error %d\n", int(GetLastError()));
    error("key generation failed");
  }
#endif
	if (flagsalt)
	{
		color_yellow();
		bigwarning();
		myprintf("10528! ENFORCING ZERO SALT!\n");
		memset(buf,0,n);
	}
  if (n>=1 && (buf[0]=='z' || buf[0]=='7'))
    buf[0]^=0x80;
}
//////////////////////////// Component ///////////////////////
// A Component is a context model, indirect context model, match model,
// fixed weight mixer, adaptive 2 input mixer without or with current
// partial byte as context, adaptive m input mixer (without or with),
// or SSE (without or with).
const int compsize[256]={0,2,3,2,3,4,6,6,3,5};
void Component::init() {
  limit=cxt=a=b=c=0;
  cm.resize(0);
  ht.resize(0);
  a16.resize(0);
}
////////////////////////// StateTable ////////////////////////
// sns[i*4] -> next state if 0, next state if 1, n0, n1
static const U8 sns[1024]={
     1,     2,     0,     0,     3,     5,     1,     0,
     4,     6,     0,     1,     7,     9,     2,     0,
     8,    11,     1,     1,     8,    11,     1,     1,
    10,    12,     0,     2,    13,    15,     3,     0,
    14,    17,     2,     1,    14,    17,     2,     1,
    16,    19,     1,     2,    16,    19,     1,     2,
    18,    20,     0,     3,    21,    23,     4,     0,
    22,    25,     3,     1,    22,    25,     3,     1,
    24,    27,     2,     2,    24,    27,     2,     2,
    26,    29,     1,     3,    26,    29,     1,     3,
    28,    30,     0,     4,    31,    33,     5,     0,
    32,    35,     4,     1,    32,    35,     4,     1,
    34,    37,     3,     2,    34,    37,     3,     2,
    36,    39,     2,     3,    36,    39,     2,     3,
    38,    41,     1,     4,    38,    41,     1,     4,
    40,    42,     0,     5,    43,    33,     6,     0,
    44,    47,     5,     1,    44,    47,     5,     1,
    46,    49,     4,     2,    46,    49,     4,     2,
    48,    51,     3,     3,    48,    51,     3,     3,
    50,    53,     2,     4,    50,    53,     2,     4,
    52,    55,     1,     5,    52,    55,     1,     5,
    40,    56,     0,     6,    57,    45,     7,     0,
    58,    47,     6,     1,    58,    47,     6,     1,
    60,    63,     5,     2,    60,    63,     5,     2,
    62,    65,     4,     3,    62,    65,     4,     3,
    64,    67,     3,     4,    64,    67,     3,     4,
    66,    69,     2,     5,    66,    69,     2,     5,
    52,    71,     1,     6,    52,    71,     1,     6,
    54,    72,     0,     7,    73,    59,     8,     0,
    74,    61,     7,     1,    74,    61,     7,     1,
    76,    63,     6,     2,    76,    63,     6,     2,
    78,    81,     5,     3,    78,    81,     5,     3,
    80,    83,     4,     4,    80,    83,     4,     4,
    82,    85,     3,     5,    82,    85,     3,     5,
    66,    87,     2,     6,    66,    87,     2,     6,
    68,    89,     1,     7,    68,    89,     1,     7,
    70,    90,     0,     8,    91,    59,     9,     0,
    92,    77,     8,     1,    92,    77,     8,     1,
    94,    79,     7,     2,    94,    79,     7,     2,
    96,    81,     6,     3,    96,    81,     6,     3,
    98,   101,     5,     4,    98,   101,     5,     4,
   100,   103,     4,     5,   100,   103,     4,     5,
    82,   105,     3,     6,    82,   105,     3,     6,
    84,   107,     2,     7,    84,   107,     2,     7,
    86,   109,     1,     8,    86,   109,     1,     8,
    70,   110,     0,     9,   111,    59,    10,     0,
   112,    77,     9,     1,   112,    77,     9,     1,
   114,    97,     8,     2,   114,    97,     8,     2,
   116,    99,     7,     3,   116,    99,     7,     3,
    62,   101,     6,     4,    62,   101,     6,     4,
    80,    83,     5,     5,    80,    83,     5,     5,
   100,    67,     4,     6,   100,    67,     4,     6,
   102,   119,     3,     7,   102,   119,     3,     7,
   104,   121,     2,     8,   104,   121,     2,     8,
    86,   123,     1,     9,    86,   123,     1,     9,
    70,   124,     0,    10,   125,    59,    11,     0,
   126,    77,    10,     1,   126,    77,    10,     1,
   128,    97,     9,     2,   128,    97,     9,     2,
    60,    63,     8,     3,    60,    63,     8,     3,
    66,    69,     3,     8,    66,    69,     3,     8,
   104,   131,     2,     9,   104,   131,     2,     9,
    86,   133,     1,    10,    86,   133,     1,    10,
    70,   134,     0,    11,   135,    59,    12,     0,
   136,    77,    11,     1,   136,    77,    11,     1,
   138,    97,    10,     2,   138,    97,    10,     2,
   104,   141,     2,    10,   104,   141,     2,    10,
    86,   143,     1,    11,    86,   143,     1,    11,
    70,   144,     0,    12,   145,    59,    13,     0,
   146,    77,    12,     1,   146,    77,    12,     1,
   148,    97,    11,     2,   148,    97,    11,     2,
   104,   151,     2,    11,   104,   151,     2,    11,
    86,   153,     1,    12,    86,   153,     1,    12,
    70,   154,     0,    13,   155,    59,    14,     0,
   156,    77,    13,     1,   156,    77,    13,     1,
   158,    97,    12,     2,   158,    97,    12,     2,
   104,   161,     2,    12,   104,   161,     2,    12,
    86,   163,     1,    13,    86,   163,     1,    13,
    70,   164,     0,    14,   165,    59,    15,     0,
   166,    77,    14,     1,   166,    77,    14,     1,
   168,    97,    13,     2,   168,    97,    13,     2,
   104,   171,     2,    13,   104,   171,     2,    13,
    86,   173,     1,    14,    86,   173,     1,    14,
    70,   174,     0,    15,   175,    59,    16,     0,
   176,    77,    15,     1,   176,    77,    15,     1,
   178,    97,    14,     2,   178,    97,    14,     2,
   104,   181,     2,    14,   104,   181,     2,    14,
    86,   183,     1,    15,    86,   183,     1,    15,
    70,   184,     0,    16,   185,    59,    17,     0,
   186,    77,    16,     1,   186,    77,    16,     1,
    74,    97,    15,     2,    74,    97,    15,     2,
   104,    89,     2,    15,   104,    89,     2,    15,
    86,   187,     1,    16,    86,   187,     1,    16,
    70,   188,     0,    17,   189,    59,    18,     0,
   190,    77,    17,     1,    86,   191,     1,    17,
    70,   192,     0,    18,   193,    59,    19,     0,
   194,    77,    18,     1,    86,   195,     1,    18,
    70,   196,     0,    19,   193,    59,    20,     0,
   197,    77,    19,     1,    86,   198,     1,    19,
    70,   196,     0,    20,   199,    77,    20,     1,
    86,   200,     1,    20,   201,    77,    21,     1,
    86,   202,     1,    21,   203,    77,    22,     1,
    86,   204,     1,    22,   205,    77,    23,     1,
    86,   206,     1,    23,   207,    77,    24,     1,
    86,   208,     1,    24,   209,    77,    25,     1,
    86,   210,     1,    25,   211,    77,    26,     1,
    86,   212,     1,    26,   213,    77,    27,     1,
    86,   214,     1,    27,   215,    77,    28,     1,
    86,   216,     1,    28,   217,    77,    29,     1,
    86,   218,     1,    29,   219,    77,    30,     1,
    86,   220,     1,    30,   221,    77,    31,     1,
    86,   222,     1,    31,   223,    77,    32,     1,
    86,   224,     1,    32,   225,    77,    33,     1,
    86,   226,     1,    33,   227,    77,    34,     1,
    86,   228,     1,    34,   229,    77,    35,     1,
    86,   230,     1,    35,   231,    77,    36,     1,
    86,   232,     1,    36,   233,    77,    37,     1,
    86,   234,     1,    37,   235,    77,    38,     1,
    86,   236,     1,    38,   237,    77,    39,     1,
    86,   238,     1,    39,   239,    77,    40,     1,
    86,   240,     1,    40,   241,    77,    41,     1,
    86,   242,     1,    41,   243,    77,    42,     1,
    86,   244,     1,    42,   245,    77,    43,     1,
    86,   246,     1,    43,   247,    77,    44,     1,
    86,   248,     1,    44,   249,    77,    45,     1,
    86,   250,     1,    45,   251,    77,    46,     1,
    86,   252,     1,    46,   253,    77,    47,     1,
    86,   254,     1,    47,   253,    77,    48,     1,
    86,   254,     1,    48,     0,     0,     0,     0
};
// Initialize next state table ns[state*4] -> next if 0, next if 1, n0, n1
StateTable::StateTable() {
  memcpy(ns, sns, sizeof(ns));
}
/////////////////////////// ZPAQL //////////////////////////
// Write header to out2, return true if HCOMP/PCOMP section is present.
// If pp is true, then write only the postprocessor code.
bool ZPAQL::write(Writer* out2, bool pp) {
  if (header.size()<=6) return false;
  assert(header[0]+256*header[1]==(cend-2)+hend-hbegin);
  assert(cend>=7);
  assert(hbegin>=cend);
  assert(hend>=hbegin);
  assert(out2);
  if (!pp) {  // if not a postprocessor then write COMP
    for (int i=0; i<cend; ++i)
      out2->put(header[i]);
  }
  else {  // write PCOMP size only
    out2->put((hend-hbegin)&255);
    out2->put((hend-hbegin)>>8);
  }
  for (int i=hbegin; i<hend; ++i)
    out2->put(header[i]);
  return true;
}
// Read header from in2
int ZPAQL::read(Reader* in2) {
  // Get header size and allocate
  int hsize=in2->get();
  hsize+=in2->get()*256;
  header.resize(hsize+300);
  cend=hbegin=hend=0;
  header[cend++]=hsize&255;
  header[cend++]=hsize>>8;
  while (cend<7) header[cend++]=in2->get(); // hh hm ph pm n
  // Read COMP
  int n=header[cend-1];
  for (int i=0; i<n; ++i) {
    int type=in2->get();  // component type
    if (type<0 || type>255) error("unexpected end of file");
    header[cend++]=type;  // component type
    int size=compsize[type];
    if (size<1) error("Invalid component type");
    if (cend+size>hsize) error("COMP overflows header");
    for (int j=1; j<size; ++j)
      header[cend++]=in2->get();
  }
  if ((header[cend++]=in2->get())!=0) error("missing COMP END");
  // Insert a guard gap and read HCOMP
  hbegin=hend=cend+128;
  if (hend>hsize+129) error("missing HCOMP");
  while (hend<hsize+129) {
    assert(hend<header.isize()-8);
    int op=in2->get();
    if (op==-1) error("unexpected end of file");
    header[hend++]=op;
  }
  if ((header[hend++]=in2->get())!=0) error("missing HCOMP END");
  assert(cend>=7 && cend<header.isize());
  assert(hbegin==cend+128 && hbegin<header.isize());
  assert(hend>hbegin && hend<header.isize());
  assert(hsize==header[0]+256*header[1]);
  assert(hsize==cend-2+hend-hbegin);
  allocx(rcode, rcode_size, 0);  // clear JIT code
  return cend+hend-hbegin;
}
// Free memory, but preserve output, sha1 pointers
void ZPAQL::clear() {
  cend=hbegin=hend=0;  // COMP and HCOMP locations
  a=b=c=d=f=pc=0;      // machine state
  header.resize(0);
  h.resize(0);
  m.resize(0);
  r.resize(0);
  allocx(rcode, rcode_size, 0);
}
// Constructor
ZPAQL::ZPAQL() {
  output=0;
  sha1=0;
  rcode=0;
  rcode_size=0;
  clear();
  outbuf.resize(1<<14);
  bufptr=0;
}
ZPAQL::~ZPAQL() {
  allocx(rcode, rcode_size, 0);
}
// Initialize machine state as HCOMP
void ZPAQL::inith() {
  assert(header.isize()>6);
  assert(output==0);
  assert(sha1==0);
  init(header[2], header[3]); // hh, hm
}
// Initialize machine state as PCOMP
void ZPAQL::initp() {
  assert(header.isize()>6);
  init(header[4], header[5]); // ph, pm
}
// Flush pending output
void ZPAQL::flush() {
  if (output) output->write(&outbuf[0], bufptr);
  if (sha1) sha1->write(&outbuf[0], bufptr);
  bufptr=0;
}
// pow(2, x)
static double pow2(int x) {
  double r=1;
  for (; x>0; x--) r+=r;
  return r;
}
// Return memory requirement in bytes
double ZPAQL::memory() {
  double mem=pow2(header[2]+2)+pow2(header[3])  // hh hm
            +pow2(header[4]+2)+pow2(header[5])  // ph pm
            +header.size();
  int cp=7;  // start of comp list
  for (unsigned int i=0; i<header[6]; ++i) {  // n
    assert(cp<cend);
    double size=pow2(header[cp+1]); // sizebits
    switch(header[cp]) {
      case CM: mem+=4*size; break;
      case ICM: mem+=64*size+1024; break;
      case MATCH: mem+=4*size+pow2(header[cp+2]); break; // bufbits
      case MIX2: mem+=2*size; break;
      case MIX: mem+=4*size*header[cp+3]; break; // m
      case ISSE: mem+=64*size+2048; break;
      case SSE: mem+=128*size; break;
    }
    cp+=compsize[header[cp]];
  }
  return mem;
}
// Initialize machine state to run a program.
void ZPAQL::init(int hbits, int mbits) {
  assert(header.isize()>0);
  assert(cend>=7);
  assert(hbegin>=cend+128);
  assert(hend>=hbegin);
  assert(hend<header.isize()-130);
  assert(header[0]+256*header[1]==(cend-2)+hend-hbegin);
  assert(bufptr==0);
  assert(outbuf.isize()>0);
  if (hbits>32) error("H too big");
  if (mbits>32) error("M too big");
  h.resize(1, hbits);
  m.resize(1, mbits);
  r.resize(256);
  a=b=c=d=pc=f=0;
}

// Run program on input by interpreting header
void ZPAQL::run0(U32 input) {
  assert(cend>6);
  assert(hbegin>=cend+128);
  assert(hend>=hbegin);
  assert(hend<header.isize()-130);
  assert(m.size()>0);
  assert(h.size()>0);
  assert(header[0]+256*header[1]==cend+hend-hbegin-2);
  pc=hbegin;
  a=input;
  while (execute()) ;
}

// Execute one instruction, return 0 after HALT else 1
int ZPAQL::execute() {
  switch(header[pc++]) {
    case 0: err(); break; // ERROR
    case 1: ++a; break; // A++
    case 2: --a; break; // A--
    case 3: a = ~a; break; // A!
    case 4: a = 0; break; // A=0
    case 7: a = r[header[pc++]]; break; // A=R N
    case 8: swap(b); break; // B<>A
    case 9: ++b; break; // B++
    case 10: --b; break; // B--
    case 11: b = ~b; break; // B!
    case 12: b = 0; break; // B=0
    case 15: b = r[header[pc++]]; break; // B=R N
    case 16: swap(c); break; // C<>A
    case 17: ++c; break; // C++
    case 18: --c; break; // C--
    case 19: c = ~c; break; // C!
    case 20: c = 0; break; // C=0
    case 23: c = r[header[pc++]]; break; // C=R N
    case 24: swap(d); break; // D<>A
    case 25: ++d; break; // D++
    case 26: --d; break; // D--
    case 27: d = ~d; break; // D!
    case 28: d = 0; break; // D=0
    case 31: d = r[header[pc++]]; break; // D=R N
    case 32: swap(m(b)); break; // *B<>A
    case 33: ++m(b); break; // *B++
    case 34: --m(b); break; // *B--
    case 35: m(b) = ~m(b); break; // *B!
    case 36: m(b) = 0; break; // *B=0
    case 39: if (f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JT N
    case 40: swap(m(c)); break; // *C<>A
    case 41: ++m(c); break; // *C++
    case 42: --m(c); break; // *C--
    case 43: m(c) = ~m(c); break; // *C!
    case 44: m(c) = 0; break; // *C=0
    case 47: if (!f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JF N
    case 48: swap(h(d)); break; // *D<>A
    case 49: ++h(d); break; // *D++
    case 50: --h(d); break; // *D--
    case 51: h(d) = ~h(d); break; // *D!
    case 52: h(d) = 0; break; // *D=0
    case 55: r[header[pc++]] = a; break; // R=A N
    case 56: return 0  ; // HALT
    case 57: outc(a&255); break; // OUT
    case 59: a = (a+m(b)+512)*773; break; // HASH
    case 60: h(d) = (h(d)+a+512)*773; break; // HASHD
    case 63: pc+=((header[pc]+128)&255)-127; break; // JMP N
    case 64: break; // A=A
    case 65: a = b; break; // A=B
    case 66: a = c; break; // A=C
    case 67: a = d; break; // A=D
    case 68: a = m(b); break; // A=*B
    case 69: a = m(c); break; // A=*C
    case 70: a = h(d); break; // A=*D
    case 71: a = header[pc++]; break; // A= N
    case 72: b = a; break; // B=A
    case 73: break; // B=B
    case 74: b = c; break; // B=C
    case 75: b = d; break; // B=D
    case 76: b = m(b); break; // B=*B
    case 77: b = m(c); break; // B=*C
    case 78: b = h(d); break; // B=*D
    case 79: b = header[pc++]; break; // B= N
    case 80: c = a; break; // C=A
    case 81: c = b; break; // C=B
    case 82: break; // C=C
    case 83: c = d; break; // C=D
    case 84: c = m(b); break; // C=*B
    case 85: c = m(c); break; // C=*C
    case 86: c = h(d); break; // C=*D
    case 87: c = header[pc++]; break; // C= N
    case 88: d = a; break; // D=A
    case 89: d = b; break; // D=B
    case 90: d = c; break; // D=C
    case 91: break; // D=D
    case 92: d = m(b); break; // D=*B
    case 93: d = m(c); break; // D=*C
    case 94: d = h(d); break; // D=*D
    case 95: d = header[pc++]; break; // D= N
    case 96: m(b) = a; break; // *B=A
    case 97: m(b) = b; break; // *B=B
    case 98: m(b) = c; break; // *B=C
    case 99: m(b) = d; break; // *B=D
    case 100: break; // *B=*B
    case 101: m(b) = m(c); break; // *B=*C
    case 102: m(b) = h(d); break; // *B=*D
    case 103: m(b) = header[pc++]; break; // *B= N
    case 104: m(c) = a; break; // *C=A
    case 105: m(c) = b; break; // *C=B
    case 106: m(c) = c; break; // *C=C
    case 107: m(c) = d; break; // *C=D
    case 108: m(c) = m(b); break; // *C=*B
    case 109: break; // *C=*C
    case 110: m(c) = h(d); break; // *C=*D
    case 111: m(c) = header[pc++]; break; // *C= N
    case 112: h(d) = a; break; // *D=A
    case 113: h(d) = b; break; // *D=B
    case 114: h(d) = c; break; // *D=C
    case 115: h(d) = d; break; // *D=D
    case 116: h(d) = m(b); break; // *D=*B
    case 117: h(d) = m(c); break; // *D=*C
    case 118: break; // *D=*D
    case 119: h(d) = header[pc++]; break; // *D= N
    case 128: a += a; break; // A+=A
    case 129: a += b; break; // A+=B
    case 130: a += c; break; // A+=C
    case 131: a += d; break; // A+=D
    case 132: a += m(b); break; // A+=*B
    case 133: a += m(c); break; // A+=*C
    case 134: a += h(d); break; // A+=*D
    case 135: a += header[pc++]; break; // A+= N
    case 136: a -= a; break; // A-=A
    case 137: a -= b; break; // A-=B
    case 138: a -= c; break; // A-=C
    case 139: a -= d; break; // A-=D
    case 140: a -= m(b); break; // A-=*B
    case 141: a -= m(c); break; // A-=*C
    case 142: a -= h(d); break; // A-=*D
    case 143: a -= header[pc++]; break; // A-= N
    case 144: a *= a; break; // A*=A
    case 145: a *= b; break; // A*=B
    case 146: a *= c; break; // A*=C
    case 147: a *= d; break; // A*=D
    case 148: a *= m(b); break; // A*=*B
    case 149: a *= m(c); break; // A*=*C
    case 150: a *= h(d); break; // A*=*D
    case 151: a *= header[pc++]; break; // A*= N
    case 152: zdiv(a); break; // A/=A
    case 153: zdiv(b); break; // A/=B
    case 154: zdiv(c); break; // A/=C
    case 155: zdiv(d); break; // A/=D
    case 156: zdiv(m(b)); break; // A/=*B
    case 157: zdiv(m(c)); break; // A/=*C
    case 158: zdiv(h(d)); break; // A/=*D
    case 159: zdiv(header[pc++]); break; // A/= N
    case 160: mod(a); break; // A%=A
    case 161: mod(b); break; // A%=B
    case 162: mod(c); break; // A%=C
    case 163: mod(d); break; // A%=D
    case 164: mod(m(b)); break; // A%=*B
    case 165: mod(m(c)); break; // A%=*C
    case 166: mod(h(d)); break; // A%=*D
    case 167: mod(header[pc++]); break; // A%= N
    case 168: a &= a; break; // A&=A
    case 169: a &= b; break; // A&=B
    case 170: a &= c; break; // A&=C
    case 171: a &= d; break; // A&=D
    case 172: a &= m(b); break; // A&=*B
    case 173: a &= m(c); break; // A&=*C
    case 174: a &= h(d); break; // A&=*D
    case 175: a &= header[pc++]; break; // A&= N
    case 176: a &= ~ a; break; // A&~A
    case 177: a &= ~ b; break; // A&~B
    case 178: a &= ~ c; break; // A&~C
    case 179: a &= ~ d; break; // A&~D
    case 180: a &= ~ m(b); break; // A&~*B
    case 181: a &= ~ m(c); break; // A&~*C
    case 182: a &= ~ h(d); break; // A&~*D
    case 183: a &= ~ header[pc++]; break; // A&~ N
    case 184: a |= a; break; // A|=A
    case 185: a |= b; break; // A|=B
    case 186: a |= c; break; // A|=C
    case 187: a |= d; break; // A|=D
    case 188: a |= m(b); break; // A|=*B
    case 189: a |= m(c); break; // A|=*C
    case 190: a |= h(d); break; // A|=*D
    case 191: a |= header[pc++]; break; // A|= N
    case 192: a ^= a; break; // A^=A
    case 193: a ^= b; break; // A^=B
    case 194: a ^= c; break; // A^=C
    case 195: a ^= d; break; // A^=D
    case 196: a ^= m(b); break; // A^=*B
    case 197: a ^= m(c); break; // A^=*C
    case 198: a ^= h(d); break; // A^=*D
    case 199: a ^= header[pc++]; break; // A^= N
    case 200: a <<= (a&31); break; // A<<=A
    case 201: a <<= (b&31); break; // A<<=B
    case 202: a <<= (c&31); break; // A<<=C
    case 203: a <<= (d&31); break; // A<<=D
    case 204: a <<= (m(b)&31); break; // A<<=*B
    case 205: a <<= (m(c)&31); break; // A<<=*C
    case 206: a <<= (h(d)&31); break; // A<<=*D
    case 207: a <<= (header[pc++]&31); break; // A<<= N
    case 208: a >>= (a&31); break; // A>>=A
    case 209: a >>= (b&31); break; // A>>=B
    case 210: a >>= (c&31); break; // A>>=C
    case 211: a >>= (d&31); break; // A>>=D
    case 212: a >>= (m(b)&31); break; // A>>=*B
    case 213: a >>= (m(c)&31); break; // A>>=*C
    case 214: a >>= (h(d)&31); break; // A>>=*D
    case 215: a >>= (header[pc++]&31); break; // A>>= N
    case 216: f = 1; break; // A==A
    case 217: f = (a == b); break; // A==B
    case 218: f = (a == c); break; // A==C
    case 219: f = (a == d); break; // A==D
    case 220: f = (a == U32(m(b))); break; // A==*B
    case 221: f = (a == U32(m(c))); break; // A==*C
    case 222: f = (a == h(d)); break; // A==*D
    case 223: f = (a == U32(header[pc++])); break; // A== N
    case 224: f = 0; break; // A<A
    case 225: f = (a < b); break; // A<B
    case 226: f = (a < c); break; // A<C
    case 227: f = (a < d); break; // A<D
    case 228: f = (a < U32(m(b))); break; // A<*B
    case 229: f = (a < U32(m(c))); break; // A<*C
    case 230: f = (a < h(d)); break; // A<*D
    case 231: f = (a < U32(header[pc++])); break; // A< N
    case 232: f = 0; break; // A>A
    case 233: f = (a > b); break; // A>B
    case 234: f = (a > c); break; // A>C
    case 235: f = (a > d); break; // A>D
    case 236: f = (a > U32(m(b))); break; // A>*B
    case 237: f = (a > U32(m(c))); break; // A>*C
    case 238: f = (a > h(d)); break; // A>*D
    case 239: f = (a > U32(header[pc++])); break; // A> N
    case 255: if((pc=hbegin+header[pc]+256*header[pc+1])>=hend)err();break;//LJ
    default: err();
  }
  return 1;
}
// Print illegal instruction error message and exit
void ZPAQL::err() {
  error("ZPAQL execution error");
}
///////////////////////// Predictor /////////////////////////
// sdt2k[i]=2048/i;
static const int sdt2k[256]={
     0,  2048,  1024,   682,   512,   409,   341,   292,
   256,   227,   204,   186,   170,   157,   146,   136,
   128,   120,   113,   107,   102,    97,    93,    89,
    85,    81,    78,    75,    73,    70,    68,    66,
    64,    62,    60,    58,    56,    55,    53,    52,
    51,    49,    48,    47,    46,    45,    44,    43,
    42,    41,    40,    40,    39,    38,    37,    37,
    36,    35,    35,    34,    34,    33,    33,    32,
    32,    31,    31,    30,    30,    29,    29,    28,
    28,    28,    27,    27,    26,    26,    26,    25,
    25,    25,    24,    24,    24,    24,    23,    23,
    23,    23,    22,    22,    22,    22,    21,    21,
    21,    21,    20,    20,    20,    20,    20,    19,
    19,    19,    19,    19,    18,    18,    18,    18,
    18,    18,    17,    17,    17,    17,    17,    17,
    17,    16,    16,    16,    16,    16,    16,    16,
    16,    15,    15,    15,    15,    15,    15,    15,
    15,    14,    14,    14,    14,    14,    14,    14,
    14,    14,    14,    13,    13,    13,    13,    13,
    13,    13,    13,    13,    13,    13,    12,    12,
    12,    12,    12,    12,    12,    12,    12,    12,
    12,    12,    12,    11,    11,    11,    11,    11,
    11,    11,    11,    11,    11,    11,    11,    11,
    11,    11,    11,    10,    10,    10,    10,    10,
    10,    10,    10,    10,    10,    10,    10,    10,
    10,    10,    10,    10,    10,     9,     9,     9,
     9,     9,     9,     9,     9,     9,     9,     9,
     9,     9,     9,     9,     9,     9,     9,     9,
     9,     9,     9,     9,     8,     8,     8,     8,
     8,     8,     8,     8,     8,     8,     8,     8,
     8,     8,     8,     8,     8,     8,     8,     8,
     8,     8,     8,     8,     8,     8,     8,     8
};
// sdt[i]=(1<<17)/(i*2+3)*2;
static const int sdt[1024]={
 87380, 52428, 37448, 29126, 23830, 20164, 17476, 15420,
 13796, 12482, 11396, 10484,  9708,  9038,  8456,  7942,
  7488,  7084,  6720,  6392,  6096,  5824,  5576,  5348,
  5140,  4946,  4766,  4598,  4442,  4296,  4160,  4032,
  3912,  3798,  3692,  3590,  3494,  3404,  3318,  3236,
  3158,  3084,  3012,  2944,  2880,  2818,  2758,  2702,
  2646,  2594,  2544,  2496,  2448,  2404,  2360,  2318,
  2278,  2240,  2202,  2166,  2130,  2096,  2064,  2032,
  2000,  1970,  1940,  1912,  1884,  1858,  1832,  1806,
  1782,  1758,  1736,  1712,  1690,  1668,  1648,  1628,
  1608,  1588,  1568,  1550,  1532,  1514,  1496,  1480,
  1464,  1448,  1432,  1416,  1400,  1386,  1372,  1358,
  1344,  1330,  1316,  1304,  1290,  1278,  1266,  1254,
  1242,  1230,  1218,  1208,  1196,  1186,  1174,  1164,
  1154,  1144,  1134,  1124,  1114,  1106,  1096,  1086,
  1078,  1068,  1060,  1052,  1044,  1036,  1028,  1020,
  1012,  1004,   996,   988,   980,   974,   966,   960,
   952,   946,   938,   932,   926,   918,   912,   906,
   900,   894,   888,   882,   876,   870,   864,   858,
   852,   848,   842,   836,   832,   826,   820,   816,
   810,   806,   800,   796,   790,   786,   782,   776,
   772,   768,   764,   758,   754,   750,   746,   742,
   738,   734,   730,   726,   722,   718,   714,   710,
   706,   702,   698,   694,   690,   688,   684,   680,
   676,   672,   670,   666,   662,   660,   656,   652,
   650,   646,   644,   640,   636,   634,   630,   628,
   624,   622,   618,   616,   612,   610,   608,   604,
   602,   598,   596,   594,   590,   588,   586,   582,
   580,   578,   576,   572,   570,   568,   566,   562,
   560,   558,   556,   554,   550,   548,   546,   544,
   542,   540,   538,   536,   532,   530,   528,   526,
   524,   522,   520,   518,   516,   514,   512,   510,
   508,   506,   504,   502,   500,   498,   496,   494,
   492,   490,   488,   488,   486,   484,   482,   480,
   478,   476,   474,   474,   472,   470,   468,   466,
   464,   462,   462,   460,   458,   456,   454,   454,
   452,   450,   448,   448,   446,   444,   442,   442,
   440,   438,   436,   436,   434,   432,   430,   430,
   428,   426,   426,   424,   422,   422,   420,   418,
   418,   416,   414,   414,   412,   410,   410,   408,
   406,   406,   404,   402,   402,   400,   400,   398,
   396,   396,   394,   394,   392,   390,   390,   388,
   388,   386,   386,   384,   382,   382,   380,   380,
   378,   378,   376,   376,   374,   372,   372,   370,
   370,   368,   368,   366,   366,   364,   364,   362,
   362,   360,   360,   358,   358,   356,   356,   354,
   354,   352,   352,   350,   350,   348,   348,   348,
   346,   346,   344,   344,   342,   342,   340,   340,
   340,   338,   338,   336,   336,   334,   334,   332,
   332,   332,   330,   330,   328,   328,   328,   326,
   326,   324,   324,   324,   322,   322,   320,   320,
   320,   318,   318,   316,   316,   316,   314,   314,
   312,   312,   312,   310,   310,   310,   308,   308,
   308,   306,   306,   304,   304,   304,   302,   302,
   302,   300,   300,   300,   298,   298,   298,   296,
   296,   296,   294,   294,   294,   292,   292,   292,
   290,   290,   290,   288,   288,   288,   286,   286,
   286,   284,   284,   284,   284,   282,   282,   282,
   280,   280,   280,   278,   278,   278,   276,   276,
   276,   276,   274,   274,   274,   272,   272,   272,
   272,   270,   270,   270,   268,   268,   268,   268,
   266,   266,   266,   266,   264,   264,   264,   262,
   262,   262,   262,   260,   260,   260,   260,   258,
   258,   258,   258,   256,   256,   256,   256,   254,
   254,   254,   254,   252,   252,   252,   252,   250,
   250,   250,   250,   248,   248,   248,   248,   248,
   246,   246,   246,   246,   244,   244,   244,   244,
   242,   242,   242,   242,   242,   240,   240,   240,
   240,   238,   238,   238,   238,   238,   236,   236,
   236,   236,   234,   234,   234,   234,   234,   232,
   232,   232,   232,   232,   230,   230,   230,   230,
   230,   228,   228,   228,   228,   228,   226,   226,
   226,   226,   226,   224,   224,   224,   224,   224,
   222,   222,   222,   222,   222,   220,   220,   220,
   220,   220,   220,   218,   218,   218,   218,   218,
   216,   216,   216,   216,   216,   216,   214,   214,
   214,   214,   214,   212,   212,   212,   212,   212,
   212,   210,   210,   210,   210,   210,   210,   208,
   208,   208,   208,   208,   208,   206,   206,   206,
   206,   206,   206,   204,   204,   204,   204,   204,
   204,   204,   202,   202,   202,   202,   202,   202,
   200,   200,   200,   200,   200,   200,   198,   198,
   198,   198,   198,   198,   198,   196,   196,   196,
   196,   196,   196,   196,   194,   194,   194,   194,
   194,   194,   194,   192,   192,   192,   192,   192,
   192,   192,   190,   190,   190,   190,   190,   190,
   190,   188,   188,   188,   188,   188,   188,   188,
   186,   186,   186,   186,   186,   186,   186,   186,
   184,   184,   184,   184,   184,   184,   184,   182,
   182,   182,   182,   182,   182,   182,   182,   180,
   180,   180,   180,   180,   180,   180,   180,   178,
   178,   178,   178,   178,   178,   178,   178,   176,
   176,   176,   176,   176,   176,   176,   176,   176,
   174,   174,   174,   174,   174,   174,   174,   174,
   172,   172,   172,   172,   172,   172,   172,   172,
   172,   170,   170,   170,   170,   170,   170,   170,
   170,   170,   168,   168,   168,   168,   168,   168,
   168,   168,   168,   166,   166,   166,   166,   166,
   166,   166,   166,   166,   166,   164,   164,   164,
   164,   164,   164,   164,   164,   164,   162,   162,
   162,   162,   162,   162,   162,   162,   162,   162,
   160,   160,   160,   160,   160,   160,   160,   160,
   160,   160,   158,   158,   158,   158,   158,   158,
   158,   158,   158,   158,   158,   156,   156,   156,
   156,   156,   156,   156,   156,   156,   156,   154,
   154,   154,   154,   154,   154,   154,   154,   154,
   154,   154,   152,   152,   152,   152,   152,   152,
   152,   152,   152,   152,   152,   150,   150,   150,
   150,   150,   150,   150,   150,   150,   150,   150,
   150,   148,   148,   148,   148,   148,   148,   148,
   148,   148,   148,   148,   148,   146,   146,   146,
   146,   146,   146,   146,   146,   146,   146,   146,
   146,   144,   144,   144,   144,   144,   144,   144,
   144,   144,   144,   144,   144,   142,   142,   142,
   142,   142,   142,   142,   142,   142,   142,   142,
   142,   142,   140,   140,   140,   140,   140,   140,
   140,   140,   140,   140,   140,   140,   140,   138,
   138,   138,   138,   138,   138,   138,   138,   138,
   138,   138,   138,   138,   138,   136,   136,   136,
   136,   136,   136,   136,   136,   136,   136,   136,
   136,   136,   136,   134,   134,   134,   134,   134,
   134,   134,   134,   134,   134,   134,   134,   134,
   134,   132,   132,   132,   132,   132,   132,   132,
   132,   132,   132,   132,   132,   132,   132,   132,
   130,   130,   130,   130,   130,   130,   130,   130,
   130,   130,   130,   130,   130,   130,   130,   128,
   128,   128,   128,   128,   128,   128,   128,   128,
   128,   128,   128,   128,   128,   128,   128,   126
};
// ssquasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64))));
// Middle 1344 of 4096 entries only.
static const U16 ssquasht[1344]={
     0,     0,     0,     0,     0,     0,     0,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     2,     2,     2,     2,     2,
     2,     2,     2,     2,     2,     2,     2,     2,
     2,     2,     2,     2,     2,     2,     2,     2,
     2,     2,     2,     2,     2,     3,     3,     3,
     3,     3,     3,     3,     3,     3,     3,     3,
     3,     3,     3,     3,     3,     3,     3,     3,
     4,     4,     4,     4,     4,     4,     4,     4,
     4,     4,     4,     4,     4,     4,     5,     5,
     5,     5,     5,     5,     5,     5,     5,     5,
     5,     5,     6,     6,     6,     6,     6,     6,
     6,     6,     6,     6,     7,     7,     7,     7,
     7,     7,     7,     7,     8,     8,     8,     8,
     8,     8,     8,     8,     9,     9,     9,     9,
     9,     9,    10,    10,    10,    10,    10,    10,
    10,    11,    11,    11,    11,    11,    12,    12,
    12,    12,    12,    13,    13,    13,    13,    13,
    14,    14,    14,    14,    15,    15,    15,    15,
    15,    16,    16,    16,    17,    17,    17,    17,
    18,    18,    18,    18,    19,    19,    19,    20,
    20,    20,    21,    21,    21,    22,    22,    22,
    23,    23,    23,    24,    24,    25,    25,    25,
    26,    26,    27,    27,    28,    28,    28,    29,
    29,    30,    30,    31,    31,    32,    32,    33,
    33,    34,    34,    35,    36,    36,    37,    37,
    38,    38,    39,    40,    40,    41,    42,    42,
    43,    44,    44,    45,    46,    46,    47,    48,
    49,    49,    50,    51,    52,    53,    54,    54,
    55,    56,    57,    58,    59,    60,    61,    62,
    63,    64,    65,    66,    67,    68,    69,    70,
    71,    72,    73,    74,    76,    77,    78,    79,
    81,    82,    83,    84,    86,    87,    88,    90,
    91,    93,    94,    96,    97,    99,   100,   102,
   103,   105,   107,   108,   110,   112,   114,   115,
   117,   119,   121,   123,   125,   127,   129,   131,
   133,   135,   137,   139,   141,   144,   146,   148,
   151,   153,   155,   158,   160,   163,   165,   168,
   171,   173,   176,   179,   182,   184,   187,   190,
   193,   196,   199,   202,   206,   209,   212,   215,
   219,   222,   226,   229,   233,   237,   240,   244,
   248,   252,   256,   260,   264,   268,   272,   276,
   281,   285,   289,   294,   299,   303,   308,   313,
   318,   323,   328,   333,   338,   343,   349,   354,
   360,   365,   371,   377,   382,   388,   394,   401,
   407,   413,   420,   426,   433,   440,   446,   453,
   460,   467,   475,   482,   490,   497,   505,   513,
   521,   529,   537,   545,   554,   562,   571,   580,
   589,   598,   607,   617,   626,   636,   646,   656,
   666,   676,   686,   697,   708,   719,   730,   741,
   752,   764,   776,   788,   800,   812,   825,   837,
   850,   863,   876,   890,   903,   917,   931,   946,
   960,   975,   990,  1005,  1020,  1036,  1051,  1067,
  1084,  1100,  1117,  1134,  1151,  1169,  1186,  1204,
  1223,  1241,  1260,  1279,  1298,  1318,  1338,  1358,
  1379,  1399,  1421,  1442,  1464,  1486,  1508,  1531,
  1554,  1577,  1600,  1624,  1649,  1673,  1698,  1724,
  1749,  1775,  1802,  1829,  1856,  1883,  1911,  1940,
  1968,  1998,  2027,  2057,  2087,  2118,  2149,  2181,
  2213,  2245,  2278,  2312,  2345,  2380,  2414,  2450,
  2485,  2521,  2558,  2595,  2633,  2671,  2709,  2748,
  2788,  2828,  2869,  2910,  2952,  2994,  3037,  3080,
  3124,  3168,  3213,  3259,  3305,  3352,  3399,  3447,
  3496,  3545,  3594,  3645,  3696,  3747,  3799,  3852,
  3906,  3960,  4014,  4070,  4126,  4182,  4240,  4298,
  4356,  4416,  4476,  4537,  4598,  4660,  4723,  4786,
  4851,  4916,  4981,  5048,  5115,  5183,  5251,  5320,
  5390,  5461,  5533,  5605,  5678,  5752,  5826,  5901,
  5977,  6054,  6131,  6210,  6289,  6369,  6449,  6530,
  6613,  6695,  6779,  6863,  6949,  7035,  7121,  7209,
  7297,  7386,  7476,  7566,  7658,  7750,  7842,  7936,
  8030,  8126,  8221,  8318,  8415,  8513,  8612,  8712,
  8812,  8913,  9015,  9117,  9221,  9324,  9429,  9534,
  9640,  9747,  9854,  9962, 10071, 10180, 10290, 10401,
 10512, 10624, 10737, 10850, 10963, 11078, 11192, 11308,
 11424, 11540, 11658, 11775, 11893, 12012, 12131, 12251,
 12371, 12491, 12612, 12734, 12856, 12978, 13101, 13224,
 13347, 13471, 13595, 13719, 13844, 13969, 14095, 14220,
 14346, 14472, 14599, 14725, 14852, 14979, 15106, 15233,
 15361, 15488, 15616, 15744, 15872, 16000, 16128, 16256,
 16384, 16511, 16639, 16767, 16895, 17023, 17151, 17279,
 17406, 17534, 17661, 17788, 17915, 18042, 18168, 18295,
 18421, 18547, 18672, 18798, 18923, 19048, 19172, 19296,
 19420, 19543, 19666, 19789, 19911, 20033, 20155, 20276,
 20396, 20516, 20636, 20755, 20874, 20992, 21109, 21227,
 21343, 21459, 21575, 21689, 21804, 21917, 22030, 22143,
 22255, 22366, 22477, 22587, 22696, 22805, 22913, 23020,
 23127, 23233, 23338, 23443, 23546, 23650, 23752, 23854,
 23955, 24055, 24155, 24254, 24352, 24449, 24546, 24641,
 24737, 24831, 24925, 25017, 25109, 25201, 25291, 25381,
 25470, 25558, 25646, 25732, 25818, 25904, 25988, 26072,
 26154, 26237, 26318, 26398, 26478, 26557, 26636, 26713,
 26790, 26866, 26941, 27015, 27089, 27162, 27234, 27306,
 27377, 27447, 27516, 27584, 27652, 27719, 27786, 27851,
 27916, 27981, 28044, 28107, 28169, 28230, 28291, 28351,
 28411, 28469, 28527, 28585, 28641, 28697, 28753, 28807,
 28861, 28915, 28968, 29020, 29071, 29122, 29173, 29222,
 29271, 29320, 29368, 29415, 29462, 29508, 29554, 29599,
 29643, 29687, 29730, 29773, 29815, 29857, 29898, 29939,
 29979, 30019, 30058, 30096, 30134, 30172, 30209, 30246,
 30282, 30317, 30353, 30387, 30422, 30455, 30489, 30522,
 30554, 30586, 30618, 30649, 30680, 30710, 30740, 30769,
 30799, 30827, 30856, 30884, 30911, 30938, 30965, 30992,
 31018, 31043, 31069, 31094, 31118, 31143, 31167, 31190,
 31213, 31236, 31259, 31281, 31303, 31325, 31346, 31368,
 31388, 31409, 31429, 31449, 31469, 31488, 31507, 31526,
 31544, 31563, 31581, 31598, 31616, 31633, 31650, 31667,
 31683, 31700, 31716, 31731, 31747, 31762, 31777, 31792,
 31807, 31821, 31836, 31850, 31864, 31877, 31891, 31904,
 31917, 31930, 31942, 31955, 31967, 31979, 31991, 32003,
 32015, 32026, 32037, 32048, 32059, 32070, 32081, 32091,
 32101, 32111, 32121, 32131, 32141, 32150, 32160, 32169,
 32178, 32187, 32196, 32205, 32213, 32222, 32230, 32238,
 32246, 32254, 32262, 32270, 32277, 32285, 32292, 32300,
 32307, 32314, 32321, 32327, 32334, 32341, 32347, 32354,
 32360, 32366, 32373, 32379, 32385, 32390, 32396, 32402,
 32407, 32413, 32418, 32424, 32429, 32434, 32439, 32444,
 32449, 32454, 32459, 32464, 32468, 32473, 32478, 32482,
 32486, 32491, 32495, 32499, 32503, 32507, 32511, 32515,
 32519, 32523, 32527, 32530, 32534, 32538, 32541, 32545,
 32548, 32552, 32555, 32558, 32561, 32565, 32568, 32571,
 32574, 32577, 32580, 32583, 32585, 32588, 32591, 32594,
 32596, 32599, 32602, 32604, 32607, 32609, 32612, 32614,
 32616, 32619, 32621, 32623, 32626, 32628, 32630, 32632,
 32634, 32636, 32638, 32640, 32642, 32644, 32646, 32648,
 32650, 32652, 32653, 32655, 32657, 32659, 32660, 32662,
 32664, 32665, 32667, 32668, 32670, 32671, 32673, 32674,
 32676, 32677, 32679, 32680, 32681, 32683, 32684, 32685,
 32686, 32688, 32689, 32690, 32691, 32693, 32694, 32695,
 32696, 32697, 32698, 32699, 32700, 32701, 32702, 32703,
 32704, 32705, 32706, 32707, 32708, 32709, 32710, 32711,
 32712, 32713, 32713, 32714, 32715, 32716, 32717, 32718,
 32718, 32719, 32720, 32721, 32721, 32722, 32723, 32723,
 32724, 32725, 32725, 32726, 32727, 32727, 32728, 32729,
 32729, 32730, 32730, 32731, 32731, 32732, 32733, 32733,
 32734, 32734, 32735, 32735, 32736, 32736, 32737, 32737,
 32738, 32738, 32739, 32739, 32739, 32740, 32740, 32741,
 32741, 32742, 32742, 32742, 32743, 32743, 32744, 32744,
 32744, 32745, 32745, 32745, 32746, 32746, 32746, 32747,
 32747, 32747, 32748, 32748, 32748, 32749, 32749, 32749,
 32749, 32750, 32750, 32750, 32750, 32751, 32751, 32751,
 32752, 32752, 32752, 32752, 32752, 32753, 32753, 32753,
 32753, 32754, 32754, 32754, 32754, 32754, 32755, 32755,
 32755, 32755, 32755, 32756, 32756, 32756, 32756, 32756,
 32757, 32757, 32757, 32757, 32757, 32757, 32757, 32758,
 32758, 32758, 32758, 32758, 32758, 32759, 32759, 32759,
 32759, 32759, 32759, 32759, 32759, 32760, 32760, 32760,
 32760, 32760, 32760, 32760, 32760, 32761, 32761, 32761,
 32761, 32761, 32761, 32761, 32761, 32761, 32761, 32762,
 32762, 32762, 32762, 32762, 32762, 32762, 32762, 32762,
 32762, 32762, 32762, 32763, 32763, 32763, 32763, 32763,
 32763, 32763, 32763, 32763, 32763, 32763, 32763, 32763,
 32763, 32764, 32764, 32764, 32764, 32764, 32764, 32764,
 32764, 32764, 32764, 32764, 32764, 32764, 32764, 32764,
 32764, 32764, 32764, 32764, 32765, 32765, 32765, 32765,
 32765, 32765, 32765, 32765, 32765, 32765, 32765, 32765,
 32765, 32765, 32765, 32765, 32765, 32765, 32765, 32765,
 32765, 32765, 32765, 32765, 32765, 32765, 32766, 32766,
 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766,
 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766,
 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766,
 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766,
 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766,
 32766, 32766, 32767, 32767, 32767, 32767, 32767, 32767
};
// stdt[i]=count of -i or i in botton or top of stretcht[]
static const U8 stdt[712]={
    64,   128,   128,   128,   128,   128,   127,   128,
   127,   128,   127,   127,   127,   127,   126,   126,
   126,   126,   126,   125,   125,   124,   125,   124,
   123,   123,   123,   123,   122,   122,   121,   121,
   120,   120,   119,   119,   118,   118,   118,   116,
   117,   115,   116,   114,   114,   113,   113,   112,
   112,   111,   110,   110,   109,   108,   108,   107,
   106,   106,   105,   104,   104,   102,   103,   101,
   101,   100,    99,    98,    98,    97,    96,    96,
    94,    94,    94,    92,    92,    91,    90,    89,
    89,    88,    87,    86,    86,    84,    84,    84,
    82,    82,    81,    80,    79,    79,    78,    77,
    76,    76,    75,    74,    73,    73,    72,    71,
    70,    70,    69,    68,    67,    67,    66,    65,
    65,    64,    63,    62,    62,    61,    61,    59,
    59,    59,    57,    58,    56,    56,    55,    54,
    54,    53,    52,    52,    51,    51,    50,    49,
    49,    48,    48,    47,    47,    45,    46,    44,
    45,    43,    43,    43,    42,    41,    41,    40,
    40,    40,    39,    38,    38,    37,    37,    36,
    36,    36,    35,    34,    34,    34,    33,    32,
    33,    32,    31,    31,    30,    31,    29,    30,
    28,    29,    28,    28,    27,    27,    27,    26,
    26,    25,    26,    24,    25,    24,    24,    23,
    23,    23,    23,    22,    22,    21,    22,    21,
    20,    21,    20,    19,    20,    19,    19,    19,
    18,    18,    18,    18,    17,    17,    17,    17,
    16,    16,    16,    16,    15,    15,    15,    15,
    15,    14,    14,    14,    14,    13,    14,    13,
    13,    13,    12,    13,    12,    12,    12,    11,
    12,    11,    11,    11,    11,    11,    10,    11,
    10,    10,    10,    10,     9,    10,     9,     9,
     9,     9,     9,     8,     9,     8,     9,     8,
     8,     8,     7,     8,     8,     7,     7,     8,
     7,     7,     7,     6,     7,     7,     6,     6,
     7,     6,     6,     6,     6,     6,     6,     5,
     6,     5,     6,     5,     5,     5,     5,     5,
     5,     5,     5,     5,     4,     5,     4,     5,
     4,     4,     5,     4,     4,     4,     4,     4,
     4,     3,     4,     4,     3,     4,     4,     3,
     3,     4,     3,     3,     3,     4,     3,     3,
     3,     3,     3,     3,     2,     3,     3,     3,
     2,     3,     2,     3,     3,     2,     2,     3,
     2,     2,     3,     2,     2,     2,     2,     3,
     2,     2,     2,     2,     2,     2,     1,     2,
     2,     2,     2,     1,     2,     2,     2,     1,
     2,     1,     2,     2,     1,     2,     1,     2,
     1,     1,     2,     1,     1,     2,     1,     1,
     2,     1,     1,     1,     1,     2,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     1,     1,     1,     1,     1,     1,
     1,     1,     0,     1,     1,     1,     1,     0,
     1,     1,     1,     0,     1,     1,     1,     0,
     1,     1,     0,     1,     1,     0,     1,     0,
     1,     1,     0,     1,     0,     1,     0,     1,
     0,     1,     0,     1,     0,     1,     0,     1,
     0,     1,     0,     1,     0,     1,     0,     0,
     1,     0,     1,     0,     0,     1,     0,     1,
     0,     0,     1,     0,     0,     1,     0,     0,
     1,     0,     0,     1,     0,     0,     0,     1,
     0,     0,     1,     0,     0,     0,     1,     0,
     0,     0,     1,     0,     0,     0,     1,     0,
     0,     0,     0,     1,     0,     0,     0,     0,
     1,     0,     0,     0,     0,     1,     0,     0,
     0,     0,     0,     1,     0,     0,     0,     0,
     0,     1,     0,     0,     0,     0,     0,     0,
     1,     0,     0,     0,     0,     0,     0,     0,
     1,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     1,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     1,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     1,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     1,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     1,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     1,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     0,     0,
     0,     0,     0,     0,     0,     0,     1,     0
};
Predictor::Predictor(ZPAQL& zr):
    c8(1), hmap4(1), z(zr) {
  assert(sizeof(U8)==1);
  assert(sizeof(U16)==2);
  assert(sizeof(U32)==4);
  assert(sizeof(U64)==8);
  assert(sizeof(short)==2);
  assert(sizeof(int)==4);
  pcode=0;
  pcode_size=0;
  initTables=false;
}
Predictor::~Predictor() {
  allocx(pcode, pcode_size, 0);  // free executable memory
}
// Initialize the predictor with a new model in z
void Predictor::init() {
  // Clear old JIT code if any
  allocx(pcode, pcode_size, 0);
  // Initialize context hash function
  z.inith();
  // Initialize model independent tables
  if (!initTables && isModeled()) {
    initTables=true;
    memcpy(dt2k, sdt2k, sizeof(dt2k));
    memcpy(dt, sdt, sizeof(dt));
    // ssquasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64))));
    // Copy middle 1344 of 4096 entries.
    memset(squasht, 0, 1376*2);
    memcpy(squasht+1376, ssquasht, 1344*2);
    for (int i=2720; i<4096; ++i) squasht[i]=32767;
    // sstretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000;
    int k=16384;
    for (unsigned int i=0; i<712; ++i)
      for (int j=stdt[i]; j>0; --j)
        stretcht[k++]=i;
    assert(k==32768);
    for (unsigned int i=0; i<16384; ++i)
      stretcht[i]=-stretcht[32767-i];
#ifndef NDEBUG
    // Verify floating point math for squash() and stretch()
    U32 sqsum=0, stsum=0;
    for (int i=32767; i>=0; --i)
      stsum=stsum*3+stretch(i);
    for (int i=4095; i>=0; --i)
      sqsum=sqsum*3+squash(i-2048);
    assert(stsum==3887533746u);
    assert(sqsum==2278286169u);
#endif
  }
  // Initialize predictions
  for (unsigned int i=0; i<256; ++i) h[i]=p[i]=0;
  // Initialize components
  for (unsigned int i=0; i<256; ++i)  // clear old model
    comp[i].init();
  int n=z.header[6]; // hsize[0..1] hh hm ph pm n (comp)[n] END 0[128] (hcomp) END
  const U8* cp=&z.header[7];  // start of component list
  for (int i=0; i<n; ++i) {
    assert(cp<&z.header[z.cend]);
    assert(cp>&z.header[0] && cp<&z.header[z.header.isize()-8]);
    Component& cr=comp[i];
    switch(cp[0]) {
      case CONS:  // c
        p[i]=(cp[1]-128)*4;
        break;
      case CM: // sizebits limit
        if (cp[1]>32) error("max size for CM is 32");
        cr.cm.resize(1, cp[1]);  // packed CM (22 bits) + CMCOUNT (10 bits)
        cr.limit=cp[2]*4;
        for (size_t j=0; j<cr.cm.size(); ++j)
          cr.cm[j]=0x80000000;
        break;
      case ICM: // sizebits
        if (cp[1]>26) error("max size for ICM is 26");
        cr.limit=1023;
        cr.cm.resize(256);
        cr.ht.resize(64, cp[1]);
        for (size_t j=0; j<cr.cm.size(); ++j)
          cr.cm[j]=st.cminit(j);
        break;
      case MATCH:  // sizebits
        if (cp[1]>32 || cp[2]>32) error("max size for MATCH is 32 32");
        cr.cm.resize(1, cp[1]);  // index
        cr.ht.resize(1, cp[2]);  // buf
        cr.ht(0)=1;
        break;
      case AVG: // j k wt
        if (cp[1]>=i) error("AVG j >= i");
        if (cp[2]>=i) error("AVG k >= i");
        break;
      case MIX2:  // sizebits j k rate mask
        if (cp[1]>32) error("max size for MIX2 is 32");
        if (cp[3]>=i) error("MIX2 k >= i");
        if (cp[2]>=i) error("MIX2 j >= i");
        cr.c=(size_t(1)<<cp[1]); // size (number of contexts)
        cr.a16.resize(1, cp[1]);  // wt[size][m]
        for (size_t j=0; j<cr.a16.size(); ++j)
          cr.a16[j]=32768;
        break;
      case MIX: {  // sizebits j m rate mask
        if (cp[1]>32) error("max size for MIX is 32");
        if (cp[2]>=i) error("MIX j >= i");
        if (cp[3]<1 || cp[3]>i-cp[2]) error("MIX m not in 1..i-j");
        int m=cp[3];  // number of inputs
        assert(m>=1);
        cr.c=(size_t(1)<<cp[1]); // size (number of contexts)
        cr.cm.resize(m, cp[1]);  // wt[size][m]
        for (size_t j=0; j<cr.cm.size(); ++j)
          cr.cm[j]=65536/m;
        break;
      }
      case ISSE:  // sizebits j
        if (cp[1]>32) error("max size for ISSE is 32");
        if (cp[2]>=i) error("ISSE j >= i");
        cr.ht.resize(64, cp[1]);
        cr.cm.resize(512);
        for (int j=0; j<256; ++j) {
          cr.cm[j*2]=1<<15;
          cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)*1024);
        }
        break;
      case SSE: // sizebits j start limit
        if (cp[1]>32) error("max size for SSE is 32");
        if (cp[2]>=i) error("SSE j >= i");
        if (cp[3]>cp[4]*4) error("SSE start > limit*4");
        cr.cm.resize(32, cp[1]);
        cr.limit=cp[4]*4;
        for (size_t j=0; j<cr.cm.size(); ++j)
          cr.cm[j]=squash((j&31)*64-992)<<17|cp[3];
        break;
      default: error("unknown component type");
    }
    assert(compsize[*cp]>0);
    cp+=compsize[*cp];
    assert(cp>=&z.header[7] && cp<&z.header[z.cend]);
  }
}
// Return next bit prediction using interpreted COMP code
int Predictor::predict0() {
  assert(initTables);
  assert(c8>=1 && c8<=255);
  // Predict next bit
  int n=z.header[6];
  assert(n>0 && n<=255);
  const U8* cp=&z.header[7];
  ///assert(cp[-1]==n);
  for (int i=0; i<n; ++i) {
    assert(cp>&z.header[0] && cp<&z.header[z.header.isize()-8]);
    Component& cr=comp[i];
    switch(cp[0]) {
      case CONS:  // c
        break;
      case CM:  // sizebits limit
        cr.cxt=h[i]^hmap4;
        p[i]=stretch(cr.cm(cr.cxt)>>17);
        break;
      case ICM: // sizebits
        assert((hmap4&15)>0);
        if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8);
        cr.cxt=cr.ht[cr.c+(hmap4&15)];
        p[i]=stretch(cr.cm(cr.cxt)>>8);
        break;
      case MATCH: // sizebits bufbits: a=len, b=offset, c=bit, cxt=bitpos,
                  //                   ht=buf, limit=pos
        assert(cr.cm.size()==(size_t(1)<<cp[1]));
        assert(cr.ht.size()==(size_t(1)<<cp[2]));
        assert(cr.a<=255);
        assert(cr.c==0 || cr.c==1);
        assert(cr.cxt<8);
        assert(cr.limit<cr.ht.size());
        if (cr.a==0) p[i]=0;
        else {
          cr.c=(cr.ht(cr.limit-cr.b)>>(7-cr.cxt))&1; // predicted bit
          p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767);
        }
        break;
      case AVG: // j k wt
        p[i]=(p[cp[1]]*cp[3]+p[cp[2]]*(256-cp[3]))>>8;
        break;
      case MIX2: { // sizebits j k rate mask
                   // c=size cm=wt[size] cxt=input
        cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1));
        assert(cr.cxt<cr.a16.size());
        int w=cr.a16[cr.cxt];
        assert(w>=0 && w<65536);
        p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16;
        assert(p[i]>=-2048 && p[i]<2048);
      }
        break;
      case MIX: {  // sizebits j m rate mask
                   // c=size cm=wt[size][m] cxt=index of wt in cm
        int m=cp[3];
        assert(m>=1 && m<=i);
        cr.cxt=h[i]+(c8&cp[5]);
        cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights
        assert(cr.cxt<=cr.cm.size()-m);
        int* wt=(int*)&cr.cm[cr.cxt];
        p[i]=0;
        for (int j=0; j<m; ++j)
          p[i]+=(wt[j]>>8)*p[cp[2]+j];
        p[i]=clamp2k(p[i]>>8);
      }
        break;
      case ISSE: { // sizebits j -- c=hi, cxt=bh
        assert((hmap4&15)>0);
        if (c8==1 || (c8&0xf0)==16)
          cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8);
        cr.cxt=cr.ht[cr.c+(hmap4&15)];  // bit history
        int *wt=(int*)&cr.cm[cr.cxt*2];
        p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16);
      }
        break;
      case SSE: { // sizebits j start limit
        cr.cxt=(h[i]+c8)*32;
        int pq=p[cp[2]]+992;
        if (pq<0) pq=0;
        if (pq>1983) pq=1983;
        int wt=pq&63;
        pq>>=6;
        assert(pq>=0 && pq<=30);
        cr.cxt+=pq;
        p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt)+(cr.cm(cr.cxt+1)>>10)*wt)>>13);
        cr.cxt+=wt>>5;
      }
        break;
      default:
        error("component predict not implemented");
    }
    cp+=compsize[cp[0]];
    assert(cp<&z.header[z.cend]);
    assert(p[i]>=-2048 && p[i]<2048);
  }
  assert(cp[0]==NONE);
  return squash(p[n-1]);
}
// Update model with decoded bit y (0...1)
void Predictor::update0(int y) {
  assert(initTables);
  assert(y==0 || y==1);
  assert(c8>=1 && c8<=255);
  assert(hmap4>=1 && hmap4<=511);
  // Update components
  const U8* cp=&z.header[7];
  int n=z.header[6];
  assert(n>=1 && n<=255);
  ///assert(cp[-1]==n);
  for (int i=0; i<n; ++i) {
    Component& cr=comp[i];
    switch(cp[0]) {
      case CONS:  // c
        break;
      case CM:  // sizebits limit
        train(cr, y);
        break;
      case ICM: { // sizebits: cxt=ht[b]=bh, ht[c][0..15]=bh row, cxt=bh
        cr.ht[cr.c+(hmap4&15)]=st.next(cr.ht[cr.c+(hmap4&15)], y);
        U32& pn=cr.cm(cr.cxt);
        pn+=int(y*32767-(pn>>8))>>2;
      }
        break;
      case MATCH: // sizebits bufbits:
                  //   a=len, b=offset, c=bit, cm=index, cxt=bitpos
                  //   ht=buf, limit=pos
      {
        assert(cr.a<=255);
        assert(cr.c==0 || cr.c==1);
        assert(cr.cxt<8);
        assert(cr.cm.size()==(size_t(1)<<cp[1]));
        assert(cr.ht.size()==(size_t(1)<<cp[2]));
        assert(cr.limit<cr.ht.size());
        if (int(cr.c)!=y) cr.a=0;  // mismatch?
        cr.ht(cr.limit)+=cr.ht(cr.limit)+y;
        if (++cr.cxt==8) {
          cr.cxt=0;
          ++cr.limit;
          cr.limit&=(1<<cp[2])-1;
          if (cr.a==0) {  // look for a match
            cr.b=cr.limit-cr.cm(h[i]);
            if (cr.b&(cr.ht.size()-1))
              while (cr.a<255
                     && cr.ht(cr.limit-cr.a-1)==cr.ht(cr.limit-cr.a-cr.b-1))
                ++cr.a;
          }
          else cr.a+=cr.a<255;
          cr.cm(h[i])=cr.limit;
        }
      }
        break;
      case AVG:  // j k wt
        break;
      case MIX2: { // sizebits j k rate mask
                   // cm=wt[size], cxt=input
        assert(cr.a16.size()==cr.c);
        assert(cr.cxt<cr.a16.size());
        int err=(y*32767-squash(p[i]))*cp[4]>>5;
        int w=cr.a16[cr.cxt];
        w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13;
        if (w<0) w=0;
        if (w>65535) w=65535;
        cr.a16[cr.cxt]=w;
      }
        break;
      case MIX: {   // sizebits j m rate mask
                    // cm=wt[size][m], cxt=input
        int m=cp[3];
        assert(m>0 && m<=i);
        assert(cr.cm.size()==m*cr.c);
        assert(cr.cxt+m<=cr.cm.size());
        int err=(y*32767-squash(p[i]))*cp[4]>>4;
        int* wt=(int*)&cr.cm[cr.cxt];
        for (int j=0; j<m; ++j)
          wt[j]=clamp512k(wt[j]+((err*p[cp[2]+j]+(1<<12))>>13));
      }
        break;
      case ISSE: { // sizebits j  -- c=hi, cxt=bh
        assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]);
        int err=y*32767-squash(p[i]);
        int *wt=(int*)&cr.cm[cr.cxt*2];
        wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13));
        wt[1]=clamp512k(wt[1]+((err+16)>>5));
        cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y);
      }
        break;
      case SSE:  // sizebits j start limit
        train(cr, y);
        break;
      default:
        assert(0);
    }
    cp+=compsize[cp[0]];
    assert(cp>=&z.header[7] && cp<&z.header[z.cend]
           && cp<&z.header[z.header.isize()-8]);
  }
  assert(cp[0]==NONE);
  // Save bit y in c8, hmap4
  c8+=c8+y;
  if (c8>=256) {
    z.run(c8-256);
    hmap4=1;
    c8=1;
    for (int i=0; i<n; ++i) h[i]=z.H(i);
  }
  else if (c8>=16 && c8<32)
    hmap4=(hmap4&0xf)<<5|y<<4|1;
  else
    hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf);
}
// Find cxt row in hash table ht. ht has rows of 16 indexed by the
// low sizebits of cxt with element 0 having the next higher 8 bits for
// collision detection. If not found after 3 adjacent tries, replace the
// row with lowest element 1 as priority. Return index of row.
size_t Predictor::find(Array<U8>& ht, int sizebits, U32 cxt) {
  assert(initTables);
  assert(ht.size()==size_t(16)<<sizebits);
  int chk=cxt>>sizebits&255;
  size_t h0=(cxt*16)&(ht.size()-16);
  if (ht[h0]==chk) return h0;
  size_t h1=h0^16;
  if (ht[h1]==chk) return h1;
  size_t h2=h0^32;
  if (ht[h2]==chk) return h2;
  if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1])
    return memset(&ht[h0], 0, 16), ht[h0]=chk, h0;
  else if (ht[h1+1]<ht[h2+1])
    return memset(&ht[h1], 0, 16), ht[h1]=chk, h1;
  else
    return memset(&ht[h2], 0, 16), ht[h2]=chk, h2;
}
/////////////////////// Decoder ///////////////////////
Decoder::Decoder(ZPAQL& z):
    in(0), low(1), high(0xFFFFFFFF), curr(0), rpos(0), wpos(0),
    pr(z), buf(BUFSIZE) {
}
void Decoder::init() {
  pr.init();
  if (pr.isModeled()) low=1, high=0xFFFFFFFF, curr=0;
  else low=high=curr=0;
}
// Return next bit of decoded input, which has 16 bit probability p of being 1
int Decoder::decode(int p) {
 /// assert(pr.isModeled());
  assert(p>=0 && p<65536);
  assert(high>low && low>0);
  if (curr<low || curr>high) error("archive corrupted");
  assert(curr>=low && curr<=high);
  U32 mid=low+U32(((high-low)*U64(U32(p)))>>16);  // split range
  assert(high>mid && mid>=low);
  int y;
  if (curr<=mid) y=1, high=mid;  // pick half
  else y=0, low=mid+1;
  while ((high^low)<0x1000000) { // shift out identical leading bytes
    high=high<<8|255;
    low=low<<8;
    low+=(low==0);
    int c=get();
    if (c<0) error("unexpected end of file");
    curr=curr<<8|c;
  }
  return y;
}
// Decompress 1 byte or -1 at end of input
int Decoder::decompress() {
  if (pr.isModeled()) {  // n>0 components?
    if (curr==0) {  // segment initialization
      for (int i=0; i<4; ++i)
        curr=curr<<8|get();
    }
    if (decode(0)) {
      if (curr!=0) error("decoding end of stream");
      return -1;
    }
    else {
      int c=1;
      while (c<256) {  // get 8 bits
        int p=pr.predict()*2+1;
        c+=c+decode(p);
        pr.update(c&1);
      }
      return c-256;
    }
  }
  else {
    if (curr==0) {
      for (int i=0; i<4; ++i) curr=curr<<8|get();
      if (curr==0) return -1;
    }
    --curr;
    return get();
  }
}
// Find end of compressed data and return next byte
int Decoder::skip() {
  int c=-1;
  if (pr.isModeled()) {
    while (curr==0)  // at start?
      curr=get();
    while (curr && (c=get())>=0)  // find 4 zeros
      curr=curr<<8|c;
    while ((c=get())==0) ;  // might be more than 4
    return c;
  }
  else {
    if (curr==0)  // at start?
      for (int i=0; i<4 && (c=get())>=0; ++i) curr=curr<<8|c;
    while (curr>0) {
      while (curr>0) {
        --curr;
        if (get()<0) return error("skipped to EOF"), -1;
      }
      for (int i=0; i<4 && (c=get())>=0; ++i) curr=curr<<8|c;
    }
    if (c>=0) c=get();
    return c;
  }
}
////////////////////// PostProcessor //////////////////////
// Copy ph, pm from block header
void PostProcessor::init(int h, int m) {
  state=hsize=0;
  ph=h;
  pm=m;
  z.clear();
}
// (PASS=0 | PROG=1 psize[0..1] pcomp[0..psize-1]) data... EOB=-1
// Return state: 1=PASS, 2..4=loading PROG, 5=PROG loaded
int PostProcessor::write(int c) {
  assert(c>=-1 && c<=255);
  switch (state) {
    case 0:  // initial state
      if (c<0) error("Unexpected EOS");
      state=c+1;  // 1=PASS, 2=PROG
      if (state>2) error("unknown post processing type");
      if (state==1) z.clear();
      break;
    case 1:  // PASS
      z.outc(c);
      break;
    case 2: // PROG
      if (c<0) error("Unexpected EOS");
      hsize=c;  // low byte of size
      state=3;
      break;
    case 3:  // PROG psize[0]
      if (c<0) error("Unexpected EOS");
      hsize+=c*256;  // high byte of psize
      if (hsize<1) error("Empty PCOMP");
      z.header.resize(hsize+300);
      z.cend=8;
      z.hbegin=z.hend=z.cend+128;
      z.header[4]=ph;
      z.header[5]=pm;
      state=4;
      break;
    case 4:  // PROG psize[0..1] pcomp[0...]
      if (c<0) error("Unexpected EOS");
      assert(z.hend<z.header.isize());
      z.header[z.hend++]=c;  // one byte of pcomp
      if (z.hend-z.hbegin==hsize) {  // last byte of pcomp?
        hsize=(z.cend-2)+z.hend-z.hbegin;
        z.header[0]=hsize&255;  // header size with empty COMP
        z.header[1]=hsize>>8;
        z.initp();
        state=5;
      }
      break;
    case 5:  // PROG ... data
      z.run(c);
      if (c<0) z.flush();
      break;
  }
  return state;
}
/////////////////////// Decompresser /////////////////////
// Find the start of a block and return true if found. Set memptr
// to memory used.
bool Decompresser::findBlock(double* memptr) {
  assert(state==BLOCK);
  // Find start of block
  U32 h1=0x3D49B113, h2=0x29EB7F93, h3=0x2614BE13, h4=0x3828EB13;
  // Rolling hashes initialized to hash of first 13 bytes
  int c;
  while ((c=dec.get())!=-1) {
    h1=h1*12+c;
    h2=h2*20+c;
    h3=h3*28+c;
    h4=h4*44+c;
    if (h1==0xB16B88F1 && h2==0xFF5376F1 && h3==0x72AC5BF1 && h4==0x2F909AF1)
      break;  // hash of 16 byte string
  }
  if (c==-1) return false;
  // Read header
  if ((c=dec.get())!=1 && c!=2) error("unsupported ZPAQ level");
  if (dec.get()!=1) error("unsupported ZPAQL type");
  z.read(&dec);
  if (c==1 && z.header.isize()>6 && z.header[6]==0)
    error("ZPAQ level 1 requires at least 1 component");
  if (memptr) *memptr=z.memory();
  state=FILENAME;
  decode_state=FIRSTSEG;
  return true;
}
// Read the start of a segment (1) or end of block code (255).
// If a segment is found, write the filename and return true, else false.
bool Decompresser::findFilename(Writer* filename) {
  assert(state==FILENAME);
  int c=dec.get();
  if (c==1) {  // segment found
    while (true) {
      c=dec.get();
      if (c==-1) error("unexpected EOF");
      if (c==0) {
        state=COMMENT;
        return true;
      }
      if (filename) filename->put(c);
    }
  }
  else if (c==255) {  // end of block found
    state=BLOCK;
    return false;
  }
  else
    error("missing segment or end of block");
  return false;
}
// Read the comment from the segment header
void Decompresser::readComment(Writer* comment) {
  assert(state==COMMENT);
  state=DATA;
  while (true) {
    int c=dec.get();
    if (c==-1) error("unexpected EOF");
    if (c==0) break;
    if (comment) comment->put(c);
  }
  if (dec.get()!=0) error("missing reserved byte");
}
// Decompress n bytes, or all if n < 0. Return false if done
bool Decompresser::decompress(int n) {
  assert(state==DATA);
  if (decode_state==SKIP) error("decompression after skipped segment");
  assert(decode_state!=SKIP);
  // Initialize models to start decompressing block
  if (decode_state==FIRSTSEG) {
    dec.init();
    assert(z.header.size()>5);
    pp.init(z.header[4], z.header[5]);
    decode_state=SEG;
  }
  // Decompress and load PCOMP into postprocessor
  while ((pp.getState()&3)!=1)
    pp.write(dec.decompress());
  // Decompress n bytes, or all if n < 0
  while (n) {
    int c=dec.decompress();
    pp.write(c);
    if (c==-1) {
      state=SEGEND;
      return false;
    }
    if (n>0) --n;
  }
  return true;
}
// Read end of block. If a SHA1 checksum is present, write 1 and the
// 20 byte checksum into sha1string, else write 0 in first byte.
// If sha1string is 0 then discard it.
void Decompresser::readSegmentEnd(char* sha1string) {
  assert(state==DATA || state==SEGEND);
  // Skip remaining data if any and get next byte
  int c=0;
  if (state==DATA) {
    c=dec.skip();
    decode_state=SKIP;
  }
  else if (state==SEGEND)
    c=dec.get();
  state=FILENAME;
  // Read checksum
  if (c==254) {
    if (sha1string) sha1string[0]=0;  // no checksum
  }
  else if (c==253) {
    if (sha1string) sha1string[0]=1;
    for (int i=1; i<=20; ++i) {
      c=dec.get();
      if (sha1string) sha1string[i]=c;
    }
  }
  else
    error("missing end of segment marker");
}
/////////////////////////// decompress() //////////////////////
void decompress(Reader* in, Writer* out) {
  Decompresser d;
  d.setInput(in);
  d.setOutput(out);
  while (d.findBlock()) {       // don't calculate memory
    while (d.findFilename()) {  // discard filename
      d.readComment();          // discard comment
      d.decompress();           // to end of segment
      d.readSegmentEnd();       // discard sha1string
    }
  }
}
/////////////////////////// Encoder ///////////////////////////
// Initialize for start of block
void Encoder::init() {
  low=1;
  high=0xFFFFFFFF;
  pr.init();
  if (!pr.isModeled()) low=0, buf.resize(1<<16);
}
// compress bit y having probability p/64K
void Encoder::encode(int y, int p) {
  assert(out);
  assert(p>=0 && p<65536);
  assert(y==0 || y==1);
  assert(high>low && low>0);
  U32 mid=low+U32(((high-low)*U64(U32(p)))>>16);  // split range
  assert(high>mid && mid>=low);
  if (y) high=mid; else low=mid+1; // pick half
  while ((high^low)<0x1000000) { // write identical leading bytes
    out->put(high>>24);  // same as low>>24
    high=high<<8|255;
    low=low<<8;
    low+=(low==0); // so we don't code 4 0 bytes in a row
  }
}
// compress byte c (0..255 or -1=EOS)
void Encoder::compress(int c) {
  assert(out);
  if (pr.isModeled()) {
    if (c==-1)
      encode(1, 0);
    else {
      assert(c>=0 && c<=255);
      encode(0, 0);
      for (int i=7; i>=0; --i) {
        int p=pr.predict()*2+1;
        assert(p>0 && p<65536);
        int y=c>>i&1;
        encode(y, p);
        pr.update(y);
      }
    }
  }
  else {
    if (low && (c<0 || low==buf.size())) {
      out->put((low>>24)&255);
      out->put((low>>16)&255);
      out->put((low>>8)&255);
      out->put(low&255);
      out->write(&buf[0], low);
      low=0;
    }
    if (c>=0) buf[low++]=c;
  }
}
//////////////////////////// Compiler /////////////////////////
// Component names
const char* compname[256]=
  {"","const","cm","icm","match","avg","mix2","mix","isse","sse",0};
// Opcodes
const char* opcodelist[272]={
"error","a++",  "a--",  "a!",   "a=0",  "",     "",     "a=r",
"b<>a", "b++",  "b--",  "b!",   "b=0",  "",     "",     "b=r",
"c<>a", "c++",  "c--",  "c!",   "c=0",  "",     "",     "c=r",
"d<>a", "d++",  "d--",  "d!",   "d=0",  "",     "",     "d=r",
"*b<>a","*b++", "*b--", "*b!",  "*b=0", "",     "",     "jt",
"*c<>a","*c++", "*c--", "*c!",  "*c=0", "",     "",     "jf",
"*d<>a","*d++", "*d--", "*d!",  "*d=0", "",     "",     "r=a",
"halt", "out",  "",     "hash", "hashd","",     "",     "jmp",
"a=a",  "a=b",  "a=c",  "a=d",  "a=*b", "a=*c", "a=*d", "a=",
"b=a",  "b=b",  "b=c",  "b=d",  "b=*b", "b=*c", "b=*d", "b=",
"c=a",  "c=b",  "c=c",  "c=d",  "c=*b", "c=*c", "c=*d", "c=",
"d=a",  "d=b",  "d=c",  "d=d",  "d=*b", "d=*c", "d=*d", "d=",
"*b=a", "*b=b", "*b=c", "*b=d", "*b=*b","*b=*c","*b=*d","*b=",
"*c=a", "*c=b", "*c=c", "*c=d", "*c=*b","*c=*c","*c=*d","*c=",
"*d=a", "*d=b", "*d=c", "*d=d", "*d=*b","*d=*c","*d=*d","*d=",
"",     "",     "",     "",     "",     "",     "",     "",
"a+=a", "a+=b", "a+=c", "a+=d", "a+=*b","a+=*c","a+=*d","a+=",
"a-=a", "a-=b", "a-=c", "a-=d", "a-=*b","a-=*c","a-=*d","a-=",
"a*=a", "a*=b", "a*=c", "a*=d", "a*=*b","a*=*c","a*=*d","a*=",
"a/=a", "a/=b", "a/=c", "a/=d", "a/=*b","a/=*c","a/=*d","a/=",
"a%=a", "a%=b", "a%=c", "a%=d", "a%=*b","a%=*c","a%=*d","a%=",
"a&=a", "a&=b", "a&=c", "a&=d", "a&=*b","a&=*c","a&=*d","a&=",
"a&~a", "a&~b", "a&~c", "a&~d", "a&~*b","a&~*c","a&~*d","a&~",
"a|=a", "a|=b", "a|=c", "a|=d", "a|=*b","a|=*c","a|=*d","a|=",
"a^=a", "a^=b", "a^=c", "a^=d", "a^=*b","a^=*c","a^=*d","a^=",
"a<<=a","a<<=b","a<<=c","a<<=d","a<<=*b","a<<=*c","a<<=*d","a<<=",
"a>>=a","a>>=b","a>>=c","a>>=d","a>>=*b","a>>=*c","a>>=*d","a>>=",
"a==a", "a==b", "a==c", "a==d", "a==*b","a==*c","a==*d","a==",
"a<a",  "a<b",  "a<c",  "a<d",  "a<*b", "a<*c", "a<*d", "a<",
"a>a",  "a>b",  "a>c",  "a>d",  "a>*b", "a>*c", "a>*d", "a>",
"",     "",     "",     "",     "",     "",     "",     "",
"",     "",     "",     "",     "",     "",     "",     "lj",
"post", "pcomp","end",  "if",   "ifnot","else", "endif","do",
"while","until","forever","ifl","ifnotl","elsel",";",    0};
// Advance in to start of next token. Tokens are delimited by white
// space. Comments inclosed in ((nested) parenthsis) are skipped.
void Compiler::next() {
  assert(in);
  for (; *in; ++in) {
    if (*in=='\n') ++line;
    if (*in=='(') state+=1+(state<0);
    else if (state>0 && *in==')') --state;
    else if (state<0 && *in<=' ') state=0;
    else if (state==0 && *in>' ') {state=-1; break;}
  }
  if (!*in) error("unexpected end of config");
}
// convert to lower case
int tolower(int c) {return (c>='A' && c<='Z') ? c+'a'-'A' : c;}
// return true if in==word up to white space or '(', case insensitive
bool Compiler::matchToken(const char* word) {
  const char* a=in;
  for (; (*a>' ' && *a!='(' && *word); ++a, ++word)
    if (tolower(*a)!=tolower(*word)) return false;
  return !*word && (*a<=' ' || *a=='(');
}
// Print error message and exit
void Compiler::syntaxError(const char* msg, const char* expected) {
  Array<char> sbuf(128);  // error message to report
  char* s=&sbuf[0];
  strcat(s, "Config line ");
  for (int i=strlen(s), r=1000000; r; r/=10)  // append line number
    if (line/r) s[i++]='0'+line/r%10;
  strcat(s, " at ");
  for (int i=strlen(s); i<40 && *in>' '; ++i)  // append token found
    s[i]=*in++;
  strcat(s, ": ");
  strncat(s, msg, 40);  // append message
  if (expected) {
    strcat(s, ", expected: ");
    strncat(s, expected, 20);  // append expected token if any
  }
  error(s);
}
// Read a token, which must be in the NULL terminated list or else
// exit with an error. If found, return its index.
int Compiler::rtoken(const char* list[]) {
  assert(in);
  assert(list);
  next();
  for (int i=0; list[i]; ++i)
    if (matchToken(list[i]))
      return i;
  syntaxError("unexpected");
  assert(0);
  return -1; // not reached
}
// Read a token which must be the specified value s
void Compiler::rtoken(const char* s) {
  assert(s);
  next();
  if (!matchToken(s)) syntaxError("expected", s);
}
// Read a number in (low...high) or exit with an error
// For numbers like $N+M, return arg[N-1]+M
int Compiler::rtoken(int low, int high) {
  next();
  int r=0;
  if (in[0]=='$' && in[1]>='1' && in[1]<='9') {
    if (in[2]=='+') r=atoi(in+3);
    if (args) r+=args[in[1]-'1'];
  }
  else if (in[0]=='-' || (in[0]>='0' && in[0]<='9')) r=atoi(in);
  else syntaxError("expected a number");
  if (r<low) syntaxError("number too low");
  if (r>high) syntaxError("number too high");
  return r;
}
// Compile HCOMP or PCOMP code. Exit on error. Return
// code for end token (POST, PCOMP, END)
int Compiler::compile_comp(ZPAQL& z) {
  int op=0;
  const int comp_begin=z.hend;
  while (true) {
    op=rtoken(opcodelist);
    if (op==POST || op==PCOMP || op==END) break;
    int operand=-1; // 0...255 if 2 bytes
    int operand2=-1;  // 0...255 if 3 bytes
    if (op==IF) {
      op=JF;
      operand=0; // set later
      if_stack.push(z.hend+1); // save jump target location
    }
    else if (op==IFNOT) {
      op=JT;
      operand=0;
      if_stack.push(z.hend+1); // save jump target location
    }
    else if (op==IFL || op==IFNOTL) {  // long if
      if (op==IFL) z.header[z.hend++]=(JT);
      if (op==IFNOTL) z.header[z.hend++]=(JF);
      z.header[z.hend++]=(3);
      op=LJ;
      operand=operand2=0;
      if_stack.push(z.hend+1);
    }
    else if (op==ELSE || op==ELSEL) {
      if (op==ELSE) op=JMP, operand=0;
      if (op==ELSEL) op=LJ, operand=operand2=0;
      int a=if_stack.pop();  // conditional jump target location
      assert(a>comp_begin && a<int(z.hend));
      if (z.header[a-1]!=LJ) {  // IF, IFNOT
        assert(z.header[a-1]==JT || z.header[a-1]==JF || z.header[a-1]==JMP);
        int j=(z.hend-a)+1+(op==LJ); // offset at IF
        assert(j>=0);
        if (j>127) syntaxError("IF too big, try IFL, IFNOTL");
        z.header[a]=j;
      }
      else {  // IFL, IFNOTL
        int j=z.hend-comp_begin+2+(op==LJ);
        assert(j>=0);
        z.header[a]=j&255;
        z.header[a+1]=(j>>8)&255;
      }
      if_stack.push(z.hend+1);  // save JMP target location
    }
    else if (op==ENDIF) {
      int a=if_stack.pop();  // jump target address
      assert(a>comp_begin && a<int(z.hend));
      int j=z.hend-a-1;  // jump offset
      assert(j>=0);
      if (z.header[a-1]!=LJ) {
        assert(z.header[a-1]==JT || z.header[a-1]==JF || z.header[a-1]==JMP);
        if (j>127) syntaxError("IF too big, try IFL, IFNOTL, ELSEL\n");
        z.header[a]=j;
      }
      else {
        assert(a+1<int(z.hend));
        j=z.hend-comp_begin;
        z.header[a]=j&255;
        z.header[a+1]=(j>>8)&255;
      }
    }
    else if (op==DO) {
      do_stack.push(z.hend);
    }
    else if (op==WHILE || op==UNTIL || op==FOREVER) {
      int a=do_stack.pop();
      assert(a>=comp_begin && a<int(z.hend));
      int j=a-z.hend-2;
      assert(j<=-2);
      if (j>=-127) {  // backward short jump
        if (op==WHILE) op=JT;
        if (op==UNTIL) op=JF;
        if (op==FOREVER) op=JMP;
        operand=j&255;
      }
      else {  // backward long jump
        j=a-comp_begin;
        assert(j>=0 && j<int(z.hend)-comp_begin);
        if (op==WHILE) {
          z.header[z.hend++]=(JF);
          z.header[z.hend++]=(3);
        }
        if (op==UNTIL) {
          z.header[z.hend++]=(JT);
          z.header[z.hend++]=(3);
        }
        op=LJ;
        operand=j&255;
        operand2=j>>8;
      }
    }
    else if ((op&7)==7) { // 2 byte operand, read N
      if (op==LJ) {
        operand=rtoken(0, 65535);
        operand2=operand>>8;
        operand&=255;
      }
      else if (op==JT || op==JF || op==JMP) {
        operand=rtoken(-128, 127);
        operand&=255;
      }
      else
        operand=rtoken(0, 255);
    }
    if (op>=0 && op<=255)
      z.header[z.hend++]=(op);
    if (operand>=0)
      z.header[z.hend++]=(operand);
    if (operand2>=0)
      z.header[z.hend++]=(operand2);
    if (z.hend>=z.header.isize()-130 || z.hend-z.hbegin+z.cend-2>65535)
      syntaxError("program too big");
  }
  z.header[z.hend++]=(0); // END
  return op;
}
// Compile a configuration file. Store COMP/HCOMP section in hcomp.
// If there is a PCOMP section, store it in pcomp and store the PCOMP
// command in pcomp_cmd. Replace "$1..$9+n" with args[0..8]+n
Compiler::Compiler(const char* in_, int* args_, ZPAQL& hz_, ZPAQL& pz_,
                   Writer* out2_): in(in_), args(args_), hz(hz_), pz(pz_),
                   out2(out2_), if_stack(1000), do_stack(1000) {
  line=1;
  state=0;
  hz.clear();
  pz.clear();
  hz.header.resize(68000);
  // Compile the COMP section of header
  rtoken("comp");
  hz.header[2]=rtoken(0, 255);  // hh
  hz.header[3]=rtoken(0, 255);  // hm
  hz.header[4]=rtoken(0, 255);  // ph
  hz.header[5]=rtoken(0, 255);  // pm
  const int n=hz.header[6]=rtoken(0, 255);  // n
  hz.cend=7;
  for (int i=0; i<n; ++i) {
    rtoken(i, i);
    CompType type=CompType(rtoken(compname));
    hz.header[hz.cend++]=type;
    int clen=libzpaq::compsize[type&255];
    if (clen<1 || clen>10) syntaxError("invalid component");
    for (int j=1; j<clen; ++j)
      hz.header[hz.cend++]=rtoken(0, 255);  // component arguments
  }
  hz.header[hz.cend++];  // end
  hz.hbegin=hz.hend=hz.cend+128;
  // Compile HCOMP
  rtoken("hcomp");
  int op=compile_comp(hz);
  // Compute header size
  int hsize=(hz.cend-2)+hz.hend-hz.hbegin;
  hz.header[0]=hsize&255;
  hz.header[1]=hsize>>8;
  // Compile POST 0 END
  if (op==POST) {
    rtoken(0, 0);
    rtoken("end");
  }
  // Compile PCOMP pcomp_cmd ; program... END
  else if (op==PCOMP) {
    pz.header.resize(68000);
    pz.header[4]=hz.header[4];  // ph
    pz.header[5]=hz.header[5];  // pm
    pz.cend=8;
    pz.hbegin=pz.hend=pz.cend+128;
    // get pcomp_cmd ending with ";" (case sensitive)
    next();
    while (*in && *in!=';') {
      if (out2)
        out2->put(*in);
      ++in;
    }
    if (*in) ++in;
    // Compile PCOMP
    op=compile_comp(pz);
    int len=(pz.cend-2)+pz.hend-pz.hbegin;  // insert header size
    assert(len>=0);
    pz.header[0]=len&255;
    pz.header[1]=len>>8;
    if (op!=END)
      syntaxError("expected END");
  }
  else if (op!=END)
    syntaxError("expected END or POST 0 END or PCOMP cmd ; ... END");
}
///////////////////// Compressor //////////////////////
// Write 13 byte start tag
// "\x37\x6B\x53\x74\xA0\x31\x83\xD3\x8C\xB2\x28\xB0\xD3"
void Compressor::writeTag() {
  assert(state==INIT);
  enc.out->put(0x37);
  enc.out->put(0x6b);
  enc.out->put(0x53);
  enc.out->put(0x74);
  enc.out->put(0xa0);
  enc.out->put(0x31);
  enc.out->put(0x83);
  enc.out->put(0xd3);
  enc.out->put(0x8c);
  enc.out->put(0xb2);
  enc.out->put(0x28);
  enc.out->put(0xb0);
  enc.out->put(0xd3);
}
void Compressor::startBlock(int level) {
  // Model 1 - min.cfg
  static const char models[]={
  26,0,1,2,0,0,2,3,16,8,19,0,0,96,4,28,
  59,10,59,112,25,10,59,10,59,112,56,0,
  // Model 2 - mid.cfg
  69,0,3,3,0,0,8,3,5,8,13,0,8,17,1,8,
  18,2,8,18,3,8,19,4,4,22,24,7,16,0,7,24,
  (char)-1,0,17,104,74,4,95,1,59,112,10,25,59,112,10,25,
  59,112,10,25,59,112,10,25,59,112,10,25,59,10,59,112,
  25,69,(char)-49,8,112,56,0,
  // Model 3 - max.cfg
  (char)-60,0,5,9,0,0,22,1,(char)-96,3,5,8,13,1,8,16,
  2,8,18,3,8,19,4,8,19,5,8,20,6,4,22,24,
  3,17,8,19,9,3,13,3,13,3,13,3,14,7,16,0,
  15,24,(char)-1,7,8,0,16,10,(char)-1,6,0,15,16,24,0,9,
  8,17,32,(char)-1,6,8,17,18,16,(char)-1,9,16,19,32,(char)-1,6,
  0,19,20,16,0,0,17,104,74,4,95,2,59,112,10,25,
  59,112,10,25,59,112,10,25,59,112,10,25,59,112,10,25,
  59,10,59,112,10,25,59,112,10,25,69,(char)-73,32,(char)-17,64,47,
  14,(char)-25,91,47,10,25,60,26,48,(char)-122,(char)-105,20,112,63,9,70,
  (char)-33,0,39,3,25,112,26,52,25,25,74,10,4,59,112,25,
  10,4,59,112,25,10,4,59,112,25,65,(char)-113,(char)-44,72,4,59,
  112,8,(char)-113,(char)-40,8,68,(char)-81,60,60,25,69,(char)-49,9,112,25,25,
  25,25,25,112,56,0,
  0,0}; // 0,0 = end of list
  if (level<1) error("compression level must be at least 1");
  const char* p=models;
  int i;
  for (i=1; i<level && toU16(p); ++i)
    p+=toU16(p)+2;
  if (toU16(p)<1) error("compression level too high");
  startBlock(p);
}
// Memory reader
class MemoryReader: public Reader {
  const char* p;
public:
  MemoryReader(const char* p_): p(p_) {}
  int get() {return *p++&255;}
};
void Compressor::startBlock(const char* hcomp) {
  assert(state==INIT);
  MemoryReader m(hcomp);
  z.read(&m);
  pz.sha1=&sha1;
  assert(z.header.isize()>6);
  enc.out->put('z');
  enc.out->put('P');
  enc.out->put('Q');
  enc.out->put(1+(z.header[6]==0));  // level 1 or 2
  enc.out->put(1);
  z.write(enc.out, false);
  state=BLOCK1;
}
void Compressor::startBlock(const char* config, int* args, Writer* pcomp_cmd) {
  assert(state==INIT);
  Compiler(config, args, z, pz, pcomp_cmd);
  pz.sha1=&sha1;
  assert(z.header.isize()>6);
  enc.out->put('z');
  enc.out->put('P');
  enc.out->put('Q');
  enc.out->put(1+(z.header[6]==0));  // level 1 or 2
  enc.out->put(1);
  z.write(enc.out, false);
  state=BLOCK1;
}
// Write a segment header
void Compressor::startSegment(const char* filename, const char* comment) {
  assert(state==BLOCK1 || state==BLOCK2);
  enc.out->put(1);
  while (filename && *filename)
    enc.out->put(*filename++);
  enc.out->put(0);
  while (comment && *comment)
    enc.out->put(*comment++);
  enc.out->put(0);
  enc.out->put(0);
  if (state==BLOCK1) state=SEG1;
  if (state==BLOCK2) state=SEG2;
}
// Initialize encoding and write pcomp to first segment
// If len is 0 then length is encoded in pcomp[0..1]
// if pcomp is 0 then get pcomp from pz.header
void Compressor::postProcess(const char* pcomp, int len) {
  if (state==SEG2) return;
  assert(state==SEG1);
  enc.init();
  if (!pcomp) {
    len=pz.hend-pz.hbegin;
    if (len>0) {
      assert(pz.header.isize()>pz.hend);
      assert(pz.hbegin>=0);
      pcomp=(const char*)&pz.header[pz.hbegin];
    }
    assert(len>=0);
  }
  else if (len==0) {
    len=toU16(pcomp);
    pcomp+=2;
  }
  if (len>0) {
    enc.compress(1);
    enc.compress(len&255);
    enc.compress((len>>8)&255);
    for (int i=0; i<len; ++i)
      enc.compress(pcomp[i]&255);
    if (verify)
      pz.initp();
  }
  else
    enc.compress(0);
  state=SEG2;
}
// Compress n bytes, or to EOF if n < 0
bool Compressor::compress(int n) {
  if (state==SEG1)
    postProcess();
  assert(state==SEG2);
  const int BUFSIZE=1<<14;
  char buf[BUFSIZE*2];  // input buffer alpine BUFSIZE*2
  while (n) {
    int nbuf=BUFSIZE;  // bytes read into buf
    if (n>=0 && n<nbuf) nbuf=n;
    int nr=in->read(buf, nbuf);
    if (nr<0 || nr>BUFSIZE || nr>nbuf) error("invalid read size");
    if (nr<=0) return false;
    if (n>=0) n-=nr;
    for (int i=0; i<nr; ++i) {
      int ch=U8(buf[i]);
      enc.compress(ch);
      if (verify) {
        if (pz.hend) pz.run(ch);
        else sha1.put(ch);
      }
    }
  }
  return true;
}
// End segment, write sha1string if present
void Compressor::endSegment(const char* sha1string) {
  if (state==SEG1)
    postProcess();
  assert(state==SEG2);
  enc.compress(-1);
  if (verify && pz.hend) {
    pz.run(-1);
    pz.flush();
  }
  enc.out->put(0);
  enc.out->put(0);
  enc.out->put(0);
  enc.out->put(0);
  if (sha1string) {
    enc.out->put(253);
    for (int i=0; i<20; ++i)
      enc.out->put(sha1string[i]);
  }
  else
    enc.out->put(254);
  state=BLOCK2;
}
#ifdef DEBUG
// End segment, write checksum and size is verify is true
char* Compressor::endSegmentChecksum(int64_t* size, bool dosha1) {
  if (state==SEG1)
    postProcess();
  assert(state==SEG2);
  enc.compress(-1);
  if (verify && pz.hend) {
    pz.run(-1);
    pz.flush();
  }
  enc.out->put(0);
  enc.out->put(0);
  enc.out->put(0);
  enc.out->put(0);
  if (verify) {
    if (size) *size=sha1.usize();
    memcpy(sha1result, sha1.result(), 20);
  }
  if (verify && dosha1) {
    enc.out->put(253);
    for (int i=0; i<20; ++i)
      enc.out->put(sha1result[i]);
  }
  else
    enc.out->put(254);
  state=BLOCK2;
  return verify ? sha1result : 0;
}
#endif
// End block
void Compressor::endBlock() {
  assert(state==BLOCK2);
  enc.out->put(255);
  state=INIT;
}
//////////////////////// ZPAQL::assemble() ////////////////////
/*
assemble();
Assembles the ZPAQL code in hcomp[0..hlen-1] and stores x86-32 or x86-64
code in rcode[0..rcode_size-1]. Execution begins at rcode[0]. It will not
write beyond the end of rcode, but in any case it returns the number of
bytes that would have been written. It returns 0 in case of error.
The assembled code implements int run() and returns 0 if successful,
1 if the ZPAQL code executes an invalid instruction or jumps out of
bounds, or 2 if OUT throws bad_alloc, or 3 for other OUT exceptions.
A ZPAQL virtual machine has the following state. All values are
unsigned and initially 0:
  a, b, c, d: 32 bit registers (pointed to by their respective parameters)
  f: 1 bit flag register (pointed to)
  r[0..255]: 32 bit registers
  m[0..msize-1]: 8 bit registers, where msize is a power of 2
  h[0..hsize-1]: 32 bit registers, where hsize is a power of 2
  out: pointer to a Writer
  sha1: pointer to a SHA1
Generally a ZPAQL machine is used to compute contexts which are
placed in h. A second machine might post-process, and write its
output to out and sha1. In either case, a machine is called with
its input in a, representing a single byte (0..255) or
(for a postprocessor) EOF (0xffffffff). Execution returs after a
ZPAQL halt instruction.
ZPAQL instructions are 1 byte unless the last 3 bits are 1.
In this case, a second operand byte follows. Opcode 255 is
the only 3 byte instruction. They are organized:
  00dddxxx = unary opcode xxx on destination ddd (ddd < 111)
  00111xxx = special instruction xxx
  01dddsss = assignment: ddd = sss (ddd < 111)
  1xxxxsss = operation xxxx from sss to a
The meaning of sss and ddd are as follows:
  000 = a   (accumulator)
  001 = b
  010 = c
  011 = d
  100 = *b  (means m[b mod msize])
  101 = *c  (means m[c mod msize])
  110 = *d  (means h[d mod hsize])
  111 = n   (constant 0..255 in second byte of instruction)
For example, 01001110 assigns *d to b. The other instructions xxx
are as follows:
Group 00dddxxx where ddd < 111 and xxx is:
  000 = ddd<>a, swap with a (except 00000000 is an error, and swap
        with *b or *c leaves the high bits of a unchanged)
  001 = ddd++, increment
  010 = ddd--, decrement
  011 = ddd!, not (invert all bits)
  100 = ddd=0, clear (set all bits of ddd to 0)
  101 = not used (error)
  110 = not used
  111 = ddd=r n, assign from r[n] to ddd, n=0..255 in next opcode byte
Except:
  00100111 = jt n, jump if f is true (n = -128..127, relative to next opcode)
  00101111 = jf n, jump if f is false (n = -128..127)
  00110111 = r=a n, assign r[n] = a (n = 0..255)
Group 00111xxx where xxx is:
  000 = halt (return)
  001 = output a
  010 = not used
  011 = hash: a = (a + *b + 512) * 773
  100 = hashd: *d = (*d + a + 512) * 773
  101 = not used
  110 = not used
  111 = unconditional jump (n = -128 to 127, relative to next opcode)
Group 1xxxxsss where xxxx is:
  0000 = a += sss (add, subtract, multiply, divide sss to a)
  0001 = a -= sss
  0010 = a *= sss
  0011 = a /= sss (unsigned, except set a = 0 if sss is 0)
  0100 = a %= sss (remainder, except set a = 0 if sss is 0)
  0101 = a &= sss (bitwise AND)
  0110 = a &= ~sss (bitwise AND with complement of sss)
  0111 = a |= sss (bitwise OR)
  1000 = a ^= sss (bitwise XOR)
  1001 = a <<= (sss % 32) (left shift by low 5 bits of sss)
  1010 = a >>= (sss % 32) (unsigned, zero bits shifted in)
  1011 = a == sss (compare, set f = true if equal or false otherwise)
  1100 = a < sss (unsigned compare, result in f)
  1101 = a > sss (unsigned compare)
  1110 = not used
  1111 = not used except 11111111 is a 3 byte jump to the absolute address
         in the next 2 bytes in little-endian (LSB first) order.
assemble() translates ZPAQL to 32 bit x86 code to be executed by run().
Registers are mapped as follows:
  eax = source sss from *b, *c, *d or sometimes n
  ecx = pointer to destination *b, *c, *d, or spare
  edx = a
  ebx = f (1 for true, 0 for false)
  esp = stack pointer
  ebp = d
  esi = b
  edi = c
run() saves non-volatile registers (ebp, esi, edi, ebx) on the stack,
loads a, b, c, d, f, and executes the translated instructions.
A halt instruction saves a, b, c, d, f, pops the saved registers
and returns. Invalid instructions or jumps outside of the range
of the ZPAQL code call libzpaq::error().
In 64 bit mode, the following additional registers are used:
  r12 = h
  r14 = r
  r15 = m
*/
// Called by out
static int flush1(ZPAQL* z) {
  try {
    z->flush();
    return 0;
  }
  catch(std::bad_alloc& x) {
    return 2;
  }
  catch(...) {
    return 3;
  }
}
// return true if op is an undefined ZPAQL instruction
static bool iserr(int op) {
  return op==0 || (op>=120 && op<=127) || (op>=240 && op<=254)
    || op==58 || (op<64 && (op%8==5 || op%8==6));
}
// Return length of ZPAQL instruction at hcomp[0]. Assume 0 padding at end.
// A run of identical ++ or -- is counted as 1 instruction.
static int oplen(const U8* hcomp) {
  if (*hcomp==255) return 3;
  if (*hcomp%8==7) return 2;
  if (*hcomp<51 && (*hcomp%8-1)/2==0) {  // ++ or -- opcode
    int i;
    for (i=1; i<127 && hcomp[i]==hcomp[0]; ++i);
    return i;
  }
  return 1;
}
// Write k bytes of x to rcode[o++] MSB first
static void put(U8* rcode, int n, int& o, U32 x, int k) {
  while (k-->0) {
    if (o<n) rcode[o]=(x>>(k*8))&255;
    ++o;
  }
}
// Write 4 bytes of x to rcode[o++] LSB first
static void put4lsb(U8* rcode, int n, int& o, U32 x) {
  for (int k=0; k<4; ++k) {
    if (o<n) rcode[o]=(x>>(k*8))&255;
    ++o;
  }
}
// Write a 1-4 byte x86 opcode without or with an 4 byte operand
// to rcode[o...]
#define put1(x) put(rcode, rcode_size, o, (x), 1)
#define put2(x) put(rcode, rcode_size, o, (x), 2)
#define put3(x) put(rcode, rcode_size, o, (x), 3)
#define put4(x) put(rcode, rcode_size, o, (x), 4)
#define put5(x,y) put4(x), put1(y)
#define put6(x,y) put4(x), put2(y)
#define put4r(x) put4lsb(rcode, rcode_size, o, x)
#define puta(x) t=U32(size_t(x)), put4r(t)
#define put1a(x,y) put1(x), puta(y)
#define put2a(x,y) put2(x), puta(y)
#define put3a(x,y) put3(x), puta(y)
#define put4a(x,y) put4(x), puta(y)
///#define put5a(x,y,z) put4(x), put1(y), puta(z)
#define put2l(x,y) put2(x), t=U32(size_t(y)), put4r(t), \
  t=U32(size_t(y)>>(S*4)), put4r(t)
// Assemble ZPAQL in in the HCOMP section of header to rcode,
// but do not write beyond rcode_size. Return the number of
// bytes output or that would have been output.
// Execution starts at rcode[0] and returns 1 if successful or 0
// in case of a ZPAQL execution error.
int ZPAQL::assemble() {
  // x86? (not foolproof)
  const int S=sizeof(char*);      // 4 = x86, 8 = x86-64
  U32 t=0x12345678;
  if (*(char*)&t!=0x78 || (S!=4 && S!=8))
    error("JIT supported only for x86-32 and x86-64");
  const U8* hcomp=&header[hbegin];
  const int hlen=(hend-hbegin)+2;
  const int msize=m.size();
  const int hsize=h.size();
  static const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax..
  Array<int> it(hlen);            // hcomp -> rcode locations
  int done=0;  // number of instructions assembled (0..hlen)
  int o=5;  // rcode output index, reserve space for jmp
  // Code for the halt instruction (restore registers and return)
  const int halt=o;
  if (S==8) {
    put2l(0x48b9, &a);        // mov rcx, a
    put2(0x8911);             // mov [rcx], edx
    put2l(0x48b9, &b);        // mov rcx, b
    put2(0x8931);             // mov [rcx], esi
    put2l(0x48b9, &c);        // mov rcx, c
    put2(0x8939);             // mov [rcx], edi
    put2l(0x48b9, &d);        // mov rcx, d
    put2(0x8929);             // mov [rcx], ebp
    put2l(0x48b9, &f);        // mov rcx, f
    put2(0x8919);             // mov [rcx], ebx
    put4(0x4883c408);         // add rsp, 8
    put2(0x415f);             // pop r15
    put2(0x415e);             // pop r14
    put2(0x415d);             // pop r13
    put2(0x415c);             // pop r12
  }
  else {
    put2a(0x8915, &a);        // mov [a], edx
    put2a(0x8935, &b);        // mov [b], esi
    put2a(0x893d, &c);        // mov [c], edi
    put2a(0x892d, &d);        // mov [d], ebp
    put2a(0x891d, &f);        // mov [f], ebx
    put3(0x83c40c);           // add esp, 12
  }
  put1(0x5b);                 // pop ebx
  put1(0x5f);                 // pop edi
  put1(0x5e);                 // pop esi
  put1(0x5d);                 // pop ebp
  put1(0xc3);                 // ret
  // Code for the out instruction.
  // Store a=edx at outbuf[bufptr++]. If full, call flush1().
  const int outlabel=o;
  if (S==8) {
    put2l(0x48b8, &outbuf[0]);// mov rax, outbuf.p
    put2l(0x49ba, &bufptr);   // mov r10, &bufptr
    put3(0x418b0a);           // mov rcx, [r10]
    put3(0x881408);           // mov [rax+rcx], dl
    put2(0xffc1);             // inc rcx
    put3(0x41890a);           // mov [r10], ecx
    put2a(0x81f9, outbuf.size());  // cmp rcx, outbuf.size()
    put2(0x7403);             // jz L1
    put2(0x31c0);             // xor eax, eax
    put1(0xc3);               // ret
    put1(0x55);               // L1: push rbp ; call flush1(this)
    put1(0x57);               // push rdi
    put1(0x56);               // push rsi
    put1(0x52);               // push rdx
    put1(0x51);               // push rcx
    put3(0x4889e5);           // mov rbp, rsp
    put4(0x4883c570);         // add rbp, 112
#if defined(unix) && !defined(__CYGWIN__)
    put2l(0x48bf, this);      // mov rdi, this
#else  // Windows
    put2l(0x48b9, this);      // mov rcx, this
#endif
    put2l(0x49bb, &flush1);   // mov r11, &flush1
    put3(0x41ffd3);           // call r11
    put1(0x59);               // pop rcx
    put1(0x5a);               // pop rdx
    put1(0x5e);               // pop rsi
    put1(0x5f);               // pop rdi
    put1(0x5d);               // pop rbp
  }
  else {
    put1a(0xb8, &outbuf[0]);  // mov eax, outbuf.p
    put2a(0x8b0d, &bufptr);   // mov ecx, [bufptr]
    put3(0x881408);           // mov [eax+ecx], dl
    put2(0xffc1);             // inc ecx
    put2a(0x890d, &bufptr);   // mov [bufptr], ecx
    put2a(0x81f9, outbuf.size());  // cmp ecx, outbuf.size()
    put2(0x7403);             // jz L1
    put2(0x31c0);             // xor eax, eax
    put1(0xc3);               // ret
    put3(0x83ec0c);           // L1: sub esp, 12
    put4(0x89542404);         // mov [esp+4], edx
    put3a(0xc70424, this);    // mov [esp], this
    put1a(0xb8, &flush1);     // mov eax, &flush1
    put2(0xffd0);             // call eax
    put4(0x8b542404);         // mov edx, [esp+4]
    put3(0x83c40c);           // add esp, 12
  }
  put1(0xc3);               // ret
  // Set it[i]=1 for each ZPAQL instruction reachable from the previous
  // instruction + 2 if reachable by a jump (or 3 if both).
  it[0]=2;
  assert(hlen>0 && hcomp[hlen-1]==0);  // ends with error
  do {
    done=0;
    const int NONE=0x80000000;
    for (int i=0; i<hlen; ++i) {
      int op=hcomp[i];
      if (it[i]) {
        int next1=i+oplen(hcomp+i), next2=NONE; // next and jump targets
        if (iserr(op)) next1=NONE;  // error
        if (op==56) next1=NONE, next2=0;  // halt
        if (op==255) next1=NONE, next2=hcomp[i+1]+256*hcomp[i+2]; // lj
        if (op==39||op==47||op==63)next2=i+2+(hcomp[i+1]<<24>>24);// jt,jf,jmp
        if (op==63) next1=NONE;  // jmp
        if ((next2<0 || next2>=hlen) && next2!=NONE) next2=hlen-1; // error
        if (next1>=0 && next1<hlen && !(it[next1]&1)) it[next1]|=1, ++done;
        if (next2>=0 && next2<hlen && !(it[next2]&2)) it[next2]|=2, ++done;
      }
    }
  } while (done>0);
  // Set it[i] bits 2-3 to 4, 8, or 12 if a comparison
  //  (==, <, > respectively) does not need to save the result in f,
  // or if a conditional jump (jt, jf) does not need to read f.
  // This is true if a comparison is followed directly by a jt/jf,
  // the jt/jf is not a jump target, the byte before is not a jump
  // target (for a 2 byte comparison), and for the comparison instruction
  // if both paths after the jt/jf lead to another comparison or error
  // before another jt/jf. At most hlen steps are traced because after
  // that it must be an infinite loop.
  for (int i=0; i<hlen; ++i) {
    const int op1=hcomp[i]; // 216..239 = comparison
    const int i2=i+1+(op1%8==7);  // address of next instruction
    const int op2=hcomp[i2];  // 39,47 = jt,jf
    if (it[i] && op1>=216 && op1<240 && (op2==39 || op2==47)
        && it[i2]==1 && (i2==i+1 || it[i+1]==0)) {
      int code=(op1-208)/8*4; // 4,8,12 is ==,<,>
      it[i2]+=code;  // OK to test CF, ZF instead of f
      for (int j=0; j<2 && code; ++j) {  // trace each path from i2
        int k=i2+2; // branch not taken
        if (j==1) k=i2+2+(hcomp[i2+1]<<24>>24);  // branch taken
        for (int l=0; l<hlen && code; ++l) {  // trace at most hlen steps
          if (k<0 || k>=hlen) break;  // out of bounds, pass
          const int op=hcomp[k];
          if (op==39 || op==47) code=0;  // jt,jf, fail
          else if (op>=216 && op<240) break;  // ==,<,>, pass
          else if (iserr(op)) break;  // error, pass
          else if (op==255) k=hcomp[k+1]+256*hcomp[k+2]; // lj
          else if (op==63) k=k+2+(hcomp[k+1]<<24>>24);  // jmp
          else if (op==56) k=0;  // halt
          else k=k+1+(op%8==7);  // ordinary instruction
        }
      }
      it[i]+=code;  // if > 0 then OK to not save flags in f (bl)
    }
  }
  // Start of run(): Save x86 and load ZPAQL registers
  const int start=o;
  assert(start>=16);
  put1(0x55);          // push ebp/rbp
  put1(0x56);          // push esi/rsi
  put1(0x57);          // push edi/rdi
  put1(0x53);          // push ebx/rbx
  if (S==8) {
    put2(0x4154);      // push r12
    put2(0x4155);      // push r13
    put2(0x4156);      // push r14
    put2(0x4157);      // push r15
    put4(0x4883ec08);  // sub rsp, 8
    put2l(0x48b8, &a); // mov rax, a
    put2(0x8b10);      // mov edx, [rax]
    put2l(0x48b8, &b); // mov rax, b
    put2(0x8b30);      // mov esi, [rax]
    put2l(0x48b8, &c); // mov rax, c
    put2(0x8b38);      // mov edi, [rax]
    put2l(0x48b8, &d); // mov rax, d
    put2(0x8b28);      // mov ebp, [rax]
    put2l(0x48b8, &f); // mov rax, f
    put2(0x8b18);      // mov ebx, [rax]
    put2l(0x49bc, &h[0]);   // mov r12, h
    put2l(0x49bd, &outbuf[0]); // mov r13, outbuf.p
    put2l(0x49be, &r[0]);   // mov r14, r
    put2l(0x49bf, &m[0]);   // mov r15, m
  }
  else {
    put3(0x83ec0c);    // sub esp, 12
    put2a(0x8b15, &a); // mov edx, [a]
    put2a(0x8b35, &b); // mov esi, [b]
    put2a(0x8b3d, &c); // mov edi, [c]
    put2a(0x8b2d, &d); // mov ebp, [d]
    put2a(0x8b1d, &f); // mov ebx, [f]
  }
  // Assemble in multiple passes until every byte of hcomp has a translation
  for (int istart=0; istart<hlen; ++istart) {
    int inc=0;
    for (int i=istart; i<hlen && it[i]; i+=inc) {
      const int code=it[i];
      inc=oplen(hcomp+i);
      // If already assembled, then assemble a jump to it
      U32 t;
      assert(it.isize()>i);
      assert(i>=0 && i<hlen);
      if (code>=16) {
        if (i>istart) {
          int a=code-o;
          if (a>-120 && a<120)
            put2(0xeb00+((a-2)&255)); // jmp short o
          else
            put1a(0xe9, a-5);  // jmp near o
        }
        break;
      }
      // Else assemble the instruction at hcomp[i] to rcode[o]
      else {
        assert(i>=0 && i<it.isize());
        assert(it[i]>0 && it[i]<16);
        assert(o>=16);
        it[i]=o;
        ++done;
        const int op=hcomp[i];
        const int arg=hcomp[i+1]+((op==255)?256*hcomp[i+2]:0);
        const int ddd=op/8%8;
        const int sss=op%8;
        // error instruction: return 1
        if (iserr(op)) {
          put1a(0xb8, 1);         // mov eax, 1
          put1a(0xe9, halt-o-4);  // jmp near halt
          continue;
        }
        // Load source *b, *c, *d, or hash (*b) into eax except:
        // {a,b,c,d}=*d, a{+,-,*,&,|,^,=,==,>,>}=*d: load address to eax
        // {a,b,c,d}={*b,*c}: load source into ddd
        if (op==59 || (op>=64 && op<240 && op%8>=4 && op%8<7)) {
          put2(0x89c0+8*regcode[sss-3+(op==59)]);  // mov eax, {esi,edi,ebp}
          const int sz=(sss==6?hsize:msize)-1;
          if (sz>=128) put1a(0x25, sz);            // and eax, dword msize-1
          else put3(0x83e000+sz);                  // and eax, byte msize-1
          const int move=(op>=64 && op<112); // = or else ddd is eax
          if (sss<6) { // ddd={a,b,c,d,*b,*c}
            if (S==8) put5(0x410fb604+8*move*regcode[ddd],0x07);
                                                   // movzx ddd, byte [r15+rax]
            else put3a(0x0fb680+8*move*regcode[ddd], &m[0]);
                                                   // movzx ddd, byte [m+eax]
          }
          else if ((0x06587000>>(op/8))&1) {// {*b,*c,*d,a/,a%,a&~,a<<,a>>}=*d
            if (S==8) put4(0x418b0484);            // mov eax, [r12+rax*4]
            else put3a(0x8b0485, &h[0]);           // mov eax, [h+eax*4]
          }
        }
        // Load destination address *b, *c, *d or hashd (*d) into ecx
        if ((op>=32 && op<56 && op%8<5) || (op>=96 && op<120) || op==60) {
          put2(0x89c1+8*regcode[op/8%8-3-(op==60)]);// mov ecx,{esi,edi,ebp}
          const int sz=(ddd==6||op==60?hsize:msize)-1;
          if (sz>=128) put2a(0x81e1, sz);   // and ecx, dword sz
          else put3(0x83e100+sz);           // and ecx, byte sz
          if (op/8%8==6 || op==60) { // *d
            if (S==8) put4(0x498d0c8c);     // lea rcx, [r12+rcx*4]
            else put3a(0x8d0c8d, &h[0]);    // lea ecx, [ecx*4+h]
          }
          else { // *b, *c
            if (S==8) put4(0x498d0c0f);     // lea rcx, [r15+rcx]
            else put2a(0x8d89, &m[0]);      // lea ecx, [ecx+h]
          }
        }
        // Translate by opcode
        switch((op/8)&31) {
          case 0:  // ddd = a
          case 1:  // ddd = b
          case 2:  // ddd = c
          case 3:  // ddd = d
            switch(sss) {
              case 0:  // ddd<>a (swap)
                put2(0x87d0+regcode[ddd]);   // xchg edx, ddd
                break;
              case 1:  // ddd++
                put3(0x83c000+256*regcode[ddd]+inc); // add ddd, inc
                break;
              case 2:  // ddd--
                put3(0x83e800+256*regcode[ddd]+inc); // sub ddd, inc
                break;
              case 3:  // ddd!
                put2(0xf7d0+regcode[ddd]);   // not ddd
                break;
              case 4:  // ddd=0
                put2(0x31c0+9*regcode[ddd]); // xor ddd,ddd
                break;
              case 7:  // ddd=r n
                if (S==8)
                  put3a(0x418b86+8*regcode[ddd], arg*4); // mov ddd, [r14+n*4]
                else
                  put2a(0x8b05+8*regcode[ddd], (&r[arg]));//mov ddd, [r+n]
                break;
            }
            break;
          case 4:  // ddd = *b
          case 5:  // ddd = *c
            switch(sss) {
              case 0:  // ddd<>a (swap)
                put2(0x8611);                // xchg dl, [ecx]
                break;
              case 1:  // ddd++
                put3(0x800100+inc);          // add byte [ecx], inc
                break;
              case 2:  // ddd--
                put3(0x802900+inc);          // sub byte [ecx], inc
                break;
              case 3:  // ddd!
                put2(0xf611);                // not byte [ecx]
                break;
              case 4:  // ddd=0
                put2(0x31c0);                // xor eax, eax
                put2(0x8801);                // mov [ecx], al
                break;
              case 7:  // jt, jf
              {
                assert(code>=0 && code<16);
                static const unsigned char jtab[2][4]={{5,4,2,7},{4,5,3,6}};
                               // jnz,je,jb,ja, jz,jne,jae,jbe
                if (code<4) put2(0x84db);    // test bl, bl
                if (arg>=128 && arg-257-i>=0 && o-it[arg-257-i]<120)
                  put2(0x7000+256*jtab[op==47][code/4]); // jx short 0
                else
                  put2a(0x0f80+jtab[op==47][code/4], 0); // jx near 0
                break;
              }
            }
            break;
          case 6:  // ddd = *d
            switch(sss) {
              case 0:  // ddd<>a (swap)
                put2(0x8711);             // xchg edx, [ecx]
                break;
              case 1:  // ddd++
                put3(0x830100+inc);       // add dword [ecx], inc
                break;
              case 2:  // ddd--
                put3(0x832900+inc);       // sub dword [ecx], inc
                break;
              case 3:  // ddd!
                put2(0xf711);             // not dword [ecx]
                break;
              case 4:  // ddd=0
                put2(0x31c0);             // xor eax, eax
                put2(0x8901);             // mov [ecx], eax
                break;
              case 7:  // ddd=r n
                if (S==8)
                  put3a(0x418996, arg*4); // mov [r14+n*4], edx
                else
                  put2a(0x8915, &r[arg]); // mov [r+n], edx
                break;
            }
            break;
          case 7:  // special
            switch(op) {
              case 56: // halt
                put2(0x31c0);             // xor eax, eax  ; return 0
                put1a(0xe9, halt-o-4);    // jmp near halt
                break;
              case 57:  // out
                put1a(0xe8, outlabel-o-4);// call outlabel
                put3(0x83f800);           // cmp eax, 0  ; returned error code
                put2(0x7405);             // je L1:
                put1a(0xe9, halt-o-4);    // jmp near halt ; L1:
                break;
              case 59:  // hash: a = (a + *b + 512) * 773
                put3a(0x8d8410, 512);     // lea edx, [eax+edx+512]
                put2a(0x69d0, 773);       // imul edx, eax, 773
                break;
              case 60:  // hashd: *d = (*d + a + 512) * 773
                put2(0x8b01);             // mov eax, [ecx]
                put3a(0x8d8410, 512);     // lea eax, [eax+edx+512]
                put2a(0x69c0, 773);       // imul eax, eax, 773
                put2(0x8901);             // mov [ecx], eax
                break;
              case 63:  // jmp
                put1a(0xe9, 0);           // jmp near 0 (fill in target later)
                break;
            }
            break;
          case 8:   // a=
          case 9:   // b=
          case 10:  // c=
          case 11:  // d=
            if (sss==7)  // n
              put1a(0xb8+regcode[ddd], arg);         // mov ddd, n
            else if (sss==6) { // *d
              if (S==8)
                put4(0x418b0484+(regcode[ddd]<<11)); // mov ddd, [r12+rax*4]
              else
                put3a(0x8b0485+(regcode[ddd]<<11),&h[0]);// mov ddd, [h+eax*4]
            }
            else if (sss<4) // a, b, c, d
              put2(0x89c0+regcode[ddd]+8*regcode[sss]);// mov ddd,sss
            break;
          case 12:  // *b=
          case 13:  // *c=
            if (sss==7) put3(0xc60100+arg);          // mov byte [ecx], n
            else if (sss==0) put2(0x8811);           // mov byte [ecx], dl
            else {
              if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss
              put2(0x8801);                          // mov byte [ecx], al
            }
            break;
          case 14:  // *d=
            if (sss<7) put2(0x8901+8*regcode[sss]);  // mov [ecx], sss
            else put2a(0xc701, arg);                 // mov dword [ecx], n
            break;
          case 15: break; // not used
          case 16:  // a+=
            if (sss==6) {
              if (S==8) put4(0x41031484);            // add edx, [r12+rax*4]
              else put3a(0x031485, &h[0]);           // add edx, [h+eax*4]
            }
            else if (sss<7) put2(0x01c2+8*regcode[sss]);// add edx, sss
            else if (arg>=128) put2a(0x81c2, arg);   // add edx, n
            else put3(0x83c200+arg);                 // add edx, byte n
            break;
          case 17:  // a-=
            if (sss==6) {
              if (S==8) put4(0x412b1484);            // sub edx, [r12+rax*4]
              else put3a(0x2b1485, &h[0]);           // sub edx, [h+eax*4]
            }
            else if (sss<7) put2(0x29c2+8*regcode[sss]);// sub edx, sss
            else if (arg>=128) put2a(0x81ea, arg);   // sub edx, n
            else put3(0x83ea00+arg);                 // sub edx, byte n
            break;
          case 18:  // a*=
            if (sss==6) {
              if (S==8) put5(0x410faf14,0x84);       // imul edx, [r12+rax*4]
              else put4a(0x0faf1485, &h[0]);         // imul edx, [h+eax*4]
            }
            else if (sss<7) put3(0x0fafd0+regcode[sss]);// imul edx, sss
            else if (arg>=128) put2a(0x69d2, arg);   // imul edx, n
            else put3(0x6bd200+arg);                 // imul edx, byte n
            break;
          case 19:  // a/=
          case 20:  // a%=
            if (sss<7) put2(0x89c1+8*regcode[sss]);  // mov ecx, sss
            else put1a(0xb9, arg);                   // mov ecx, n
            put2(0x85c9);                            // test ecx, ecx
            put3(0x0f44d1);                          // cmovz edx, ecx
            put2(0x7408-2*(op/8==20));               // jz (over rest)
            put2(0x89d0);                            // mov eax, edx
            put2(0x31d2);                            // xor edx, edx
            put2(0xf7f1);                            // div ecx
            if (op/8==19) put2(0x89c2);              // mov edx, eax
            break;
          case 21:  // a&=
            if (sss==6) {
              if (S==8) put4(0x41231484);            // and edx, [r12+rax*4]
              else put3a(0x231485, &h[0]);           // and edx, [h+eax*4]
            }
            else if (sss<7) put2(0x21c2+8*regcode[sss]);// and edx, sss
            else if (arg>=128) put2a(0x81e2, arg);   // and edx, n
            else put3(0x83e200+arg);                 // and edx, byte n
            break;
          case 22:  // a&~
            if (sss==7) {
              if (arg<128) put3(0x83e200+(~arg&255));// and edx, byte ~n
              else put2a(0x81e2, ~arg);              // and edx, ~n
            }
            else {
              if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss
              put2(0xf7d0);                          // not eax
              put2(0x21c2);                          // and edx, eax
            }
            break;
          case 23:  // a|=
            if (sss==6) {
              if (S==8) put4(0x410b1484);            // or edx, [r12+rax*4]
              else put3a(0x0b1485, &h[0]);           // or edx, [h+eax*4]
            }
            else if (sss<7) put2(0x09c2+8*regcode[sss]);// or edx, sss
            else if (arg>=128) put2a(0x81ca, arg);   // or edx, n
            else put3(0x83ca00+arg);                 // or edx, byte n
            break;
          case 24:  // a^=
            if (sss==6) {
              if (S==8) put4(0x41331484);            // xor edx, [r12+rax*4]
              else put3a(0x331485, &h[0]);           // xor edx, [h+eax*4]
            }
            else if (sss<7) put2(0x31c2+8*regcode[sss]);// xor edx, sss
            else if (arg>=128) put2a(0x81f2, arg);   // xor edx, byte n
            else put3(0x83f200+arg);                 // xor edx, n
            break;
          case 25:  // a<<=
          case 26:  // a>>=
            if (sss==7)  // sss = n
              put3(0xc1e200+8*256*(op/8==26)+arg);   // shl/shr n
            else {
              put2(0x89c1+8*regcode[sss]);           // mov ecx, sss
              put2(0xd3e2+8*(op/8==26));             // shl/shr edx, cl
            }
            break;
          case 27:  // a==
          case 28:  // a<
          case 29:  // a>
            if (sss==6) {
              if (S==8) put4(0x413b1484);            // cmp edx, [r12+rax*4]
              else put3a(0x3b1485, &h[0]);           // cmp edx, [h+eax*4]
            }
            else if (sss==7)  // sss = n
              put2a(0x81fa, arg);                    // cmp edx, dword n
            else
              put2(0x39c2+8*regcode[sss]);           // cmp edx, sss
            if (code<4) {
              if (op/8==27) put3(0x0f94c3);          // setz bl
              if (op/8==28) put3(0x0f92c3);          // setc bl
              if (op/8==29) put3(0x0f97c3);          // seta bl
            }
            break;
          case 30:  // not used
          case 31:  // 255 = lj
            if (op==255) put1a(0xe9, 0);             // jmp near
            break;
        }
      }
    }
  }
  // Finish first pass
  const int rsize=o;
  if (o>rcode_size) return rsize;
  // Fill in jump addresses (second pass)
  for (int i=0; i<hlen; ++i) {
    if (it[i]<16) continue;
    int op=hcomp[i];
    if (op==39 || op==47 || op==63 || op==255) {  // jt, jf, jmp, lj
      int target=hcomp[i+1];
      if (op==255) target+=hcomp[i+2]*256;  // lj
      else {
        if (target>=128) target-=256;
        target+=i+2;
      }
      if (target<0 || target>=hlen) target=hlen-1;  // runtime ZPAQL error
      o=it[i];
      assert(o>=16 && o<rcode_size);
      if ((op==39 || op==47) && rcode[o]==0x84) o+=2;  // jt, jf -> skip test
      assert(o>=16 && o<rcode_size);
      if (rcode[o]==0x0f) ++o;  // first byte of jz near, jnz near
      assert(o<rcode_size);
      op=rcode[o++];  // x86 opcode
      target=it[target]-o;
      if ((op>=0x72 && op<0x78) || op==0xeb) {  // jx, jmp short
        --target;
        if (target<-128 || target>127)
          error("Cannot code x86 short jump");
        assert(o<rcode_size);
        rcode[o]=target&255;
      }
      else if ((op>=0x82 && op<0x88) || op==0xe9) // jx, jmp near
      {
        target-=4;
        puta(target);
      }
      else assert(false);  // not a x86 jump
    }
  }
  // Jump to start
  o=0;
  put1a(0xe9, start-5);  // jmp near start
  return rsize;
}
//////////////////////// Predictor::assemble_p() /////////////////////
// Assemble the ZPAQL code in the HCOMP section of z.header to pcomp and
// return the number of bytes of x86 or x86-64 code written, or that would
// be written if pcomp were large enough. The code for predict() begins
// at pr.pcomp[0] and update() at pr.pcomp[5], both as jmp instructions.
// The assembled code is equivalent to int predict(Predictor*)
// and void update(Predictor*, int y); The Preditor address is placed in
// edi/rdi. The update bit y is placed in ebp/rbp.
int Predictor::assemble_p() {
  Predictor& pr=*this;
  U8* rcode=pr.pcode;         // x86 output array
  int rcode_size=pcode_size;  // output size
  int o=0;                    // output index in pcode
  const int S=sizeof(char*);  // 4 or 8
  U8* hcomp=&pr.z.header[0];  // The code to translate
#define off(x)  ((char*)&(pr.x)-(char*)&pr)
#define offc(x) ((char*)&(pr.comp[i].x)-(char*)&pr)
  // test for little-endian (probably x86)
  U32 t=0x12345678;
  if (*(char*)&t!=0x78 || (S!=4 && S!=8))
    error("JIT supported only for x86-32 and x86-64");
  // Initialize for predict(). Put predictor address in edi/rdi
  put1a(0xe9, 5);             // jmp predict
  put1a(0, 0x90909000);       // reserve space for jmp update
  put1(0x53);                 // push ebx/rbx
  put1(0x55);                 // push ebp/rbp
  put1(0x56);                 // push esi/rsi
  put1(0x57);                 // push edi/rdi
  if (S==4)
    put4(0x8b7c2414);         // mov edi,[esp+0x14] ; pr
  else {
#if !defined(unix) || defined(__CYGWIN__)
    put3(0x4889cf);           // mov rdi, rcx (1st arg in Win64)
#endif
  }
  // Code predict() for each component
  const int n=hcomp[6];  // number of components
  U8* cp=hcomp+7;
  for (int i=0; i<n; ++i, cp+=compsize[cp[0]]) {
    if (cp-hcomp>=pr.z.cend) error("comp too big");
    if (cp[0]<1 || cp[0]>9) error("invalid component");
    assert(compsize[cp[0]]>0 && compsize[cp[0]]<8);
    switch (cp[0]) {
      case CONS:  // c
        break;
      case CM:  // sizebits limit
        // Component& cr=comp[i];
        // cr.cxt=h[i]^hmap4;
        // p[i]=stretch(cr.cm(cr.cxt)>>17);
        put2a(0x8b87, off(h[i]));              // mov eax, [edi+&h[i]]
        put2a(0x3387, off(hmap4));             // xor eax, [edi+&hmap4]
        put1a(0x25, (1<<cp[1])-1);             // and eax, size-1
        put2a(0x8987, offc(cxt));              // mov [edi+cxt], eax
        if (S==8) put1(0x48);                  // rex.w (esi->rsi)
        put2a(0x8bb7, offc(cm));               // mov esi, [edi+&cm]
        put3(0x8b0486);                        // mov eax, [esi+eax*4]
        put3(0xc1e811);                        // shr eax, 17
        put4a(0x0fbf8447, off(stretcht));      // movsx eax,word[edi+eax*2+..]
        put2a(0x8987, off(p[i]));              // mov [edi+&p[i]], eax
        break;
      case ISSE:  // sizebits j -- c=hi, cxt=bh
        // assert((hmap4&15)>0);
        // if (c8==1 || (c8&0xf0)==16)
        //   cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8);
        // cr.cxt=cr.ht[cr.c+(hmap4&15)];  // bit history
        // int *wt=(int*)&cr.cm[cr.cxt*2];
        // p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16);
      case ICM: // sizebits
        // assert((hmap4&15)>0);
        // if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8);
        // cr.cxt=cr.ht[cr.c+(hmap4&15)];
        // p[i]=stretch(cr.cm(cr.cxt)>>8);
        //
        // Find cxt row in hash table ht. ht has rows of 16 indexed by the low
        // sizebits of cxt with element 0 having the next higher 8 bits for
        // collision detection. If not found after 3 adjacent tries, replace
        // row with lowest element 1 as priority. Return index of row.
        //
        // size_t Predictor::find(Array<U8>& ht, int sizebits, U32 cxt) {
        //  assert(ht.size()==size_t(16)<<sizebits);
        //  int chk=cxt>>sizebits&255;
        //  size_t h0=(cxt*16)&(ht.size()-16);
        //  if (ht[h0]==chk) return h0;
        //  size_t h1=h0^16;
        //  if (ht[h1]==chk) return h1;
        //  size_t h2=h0^32;
        //  if (ht[h2]==chk) return h2;
        //  if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1])
        //    return memset(&ht[h0], 0, 16), ht[h0]=chk, h0;
        //  else if (ht[h1+1]<ht[h2+1])
        //    return memset(&ht[h1], 0, 16), ht[h1]=chk, h1;
        //  else
        //    return memset(&ht[h2], 0, 16), ht[h2]=chk, h2;
        // }
        if (S==8) put1(0x48);                  // rex.w
        put2a(0x8bb7, offc(ht));               // mov esi, [edi+&ht]
        put2(0x8b07);                          // mov eax, edi ; c8
        put2(0x89c1);                          // mov ecx, eax ; c8
        put3(0x83f801);                        // cmp eax, 1
        put2(0x740a);                          // je L1
        put1a(0x25, 240);                      // and eax, 0xf0
        put3(0x83f810);                        // cmp eax, 16
        put2(0x7576);                          // jne L2 ; skip find()
           // L1: ; find cxt in ht, return index in eax
        put3(0xc1e104);                        // shl ecx, 4
        put2a(0x038f, off(h[i]));              // add [edi+&h[i]]
        put2(0x89c8);                          // mov eax, ecx ; cxt
        put3(0xc1e902+cp[1]);                  // shr ecx, sizebits+2
        put2a(0x81e1, 255);                    // and eax, 255 ; chk
        put3(0xc1e004);                        // shl eax, 4
        put1a(0x25, (64<<cp[1])-16);           // and eax, ht.size()-16 = h0
        put3(0x3a0c06);                        // cmp cl, [esi+eax] ; ht[h0]
        put2(0x744d);                          // je L3 ; match h0
        put3(0x83f010);                        // xor eax, 16 ; h1
        put3(0x3a0c06);                        // cmp cl, [esi+eax]
        put2(0x7445);                          // je L3 ; match h1
        put3(0x83f030);                        // xor eax, 48 ; h2
        put3(0x3a0c06);                        // cmp cl, [esi+eax]
        put2(0x743d);                          // je L3 ; match h2
          // No checksum match, so replace the lowest priority among h0,h1,h2
        put3(0x83f021);                        // xor eax, 33 ; h0+1
        put3(0x8a1c06);                        // mov bl, [esi+eax] ; ht[h0+1]
        put2(0x89c2);                          // mov edx, eax ; h0+1
        put3(0x83f220);                        // xor edx, 32  ; h2+1
        put3(0x3a1c16);                        // cmp bl, [esi+edx]
        put2(0x7708);                          // ja L4 ; test h1 vs h2
        put3(0x83f230);                        // xor edx, 48  ; h1+1
        put3(0x3a1c16);                        // cmp bl, [esi+edx]
        put2(0x7611);                          // jbe L7 ; replace h0
          // L4: ; h0 is not lowest, so replace h1 or h2
        put3(0x83f010);                        // xor eax, 16 ; h1+1
        put3(0x8a1c06);                        // mov bl, [esi+eax]
        put3(0x83f030);                        // xor eax, 48 ; h2+1
        put3(0x3a1c06);                        // cmp bl, [esi+eax]
        put2(0x7303);                          // jae L7
        put3(0x83f030);                        // xor eax, 48 ; h1+1
          // L7: ; replace row pointed to by eax = h0,h1,h2
        put3(0x83f001);                        // xor eax, 1
        put3(0x890c06);                        // mov [esi+eax], ecx ; chk
        put2(0x31c9);                          // xor ecx, ecx
        put4(0x894c0604);                      // mov [esi+eax+4], ecx
        put4(0x894c0608);                      // mov [esi+eax+8], ecx
        put4(0x894c060c);                      // mov [esi+eax+12], ecx
          // L3: ; save nibble context (in eax) in c
        put2a(0x8987, offc(c));                // mov [edi+c], eax
        put2(0xeb06);                          // jmp L8
          // L2: ; get nibble context
        put2a(0x8b87, offc(c));                // mov eax, [edi+c]
          // L8: ; nibble context is in eax
        put2a(0x8b97, off(hmap4));             // mov edx, [edi+&hmap4]
        put3(0x83e20f);                        // and edx, 15  ; hmap4
        put2(0x01d0);                          // add eax, edx ; c+(hmap4&15)
        put4(0x0fb61406);                      // movzx edx, byte [esi+eax]
        put2a(0x8997, offc(cxt));              // mov [edi+&cxt], edx ; cxt=bh
        if (S==8) put1(0x48);                  // rex.w
        put2a(0x8bb7, offc(cm));               // mov esi, [edi+&cm] ; cm
        // esi points to cm[256] (ICM) or cm[512] (ISSE) with 23 bit
        // prediction (ICM) or a pair of 20 bit signed weights (ISSE).
        // cxt = bit history bh (0..255) is in edx.
        if (cp[0]==ICM) {
          put3(0x8b0496);                      // mov eax, [esi+edx*4];cm[bh]
          put3(0xc1e808);                      // shr eax, 8
          put4a(0x0fbf8447, off(stretcht));    // movsx eax,word[edi+eax*2+..]
        }
        else {  // ISSE
          put2a(0x8b87, off(p[cp[2]]));        // mov eax, [edi+&p[j]]
          put4(0x0faf04d6);                    // imul eax, [esi+edx*8] ;wt[0]
          put4(0x8b4cd604);                    // mov ecx, [esi+edx*8+4];wt[1]
          put3(0xc1e106);                      // shl ecx, 6
          put2(0x01c8);                        // add eax, ecx
          put3(0xc1f810);                      // sar eax, 16
          put1a(0xb9, 2047);                   // mov ecx, 2047
          put2(0x39c8);                        // cmp eax, ecx
          put3(0x0f4fc1);                      // cmovg eax, ecx
          put1a(0xb9, -2048);                  // mov ecx, -2048
          put2(0x39c8);                        // cmp eax, ecx
          put3(0x0f4cc1);                      // cmovl eax, ecx
        }
        put2a(0x8987, off(p[i]));              // mov [edi+&p[i]], eax
        break;
      case MATCH: // sizebits bufbits: a=len, b=offset, c=bit, cxt=bitpos,
                  //                   ht=buf, limit=pos
        // assert(cr.cm.size()==(size_t(1)<<cp[1]));
        // assert(cr.ht.size()==(size_t(1)<<cp[2]));
        // assert(cr.a<=255);
        // assert(cr.c==0 || cr.c==1);
        // assert(cr.cxt<8);
        // assert(cr.limit<cr.ht.size());
        // if (cr.a==0) p[i]=0;
        // else {
        //   cr.c=(cr.ht(cr.limit-cr.b)>>(7-cr.cxt))&1; // predicted bit
        //   p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767);
        // }
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(ht));       // mov esi, [edi+&ht]
        // If match length (a) is 0 then p[i]=0
        put2a(0x8b87, offc(a));        // mov eax, [edi+&a]
        put2(0x85c0);                  // test eax, eax
        put2(0x7449);                  // jz L2 ; p[i]=0
        // Else put predicted bit in c
        put1a(0xb9, 7);                // mov ecx, 7
        put2a(0x2b8f, offc(cxt));      // sub ecx, [edi+&cxt]
        put2a(0x8b87, offc(limit));    // mov eax, [edi+&limit]
        put2a(0x2b87, offc(b));        // sub eax, [edi+&b]
        put1a(0x25, (1<<cp[2])-1);     // and eax, ht.size()-1
        put4(0x0fb60406);              // movzx eax, byte [esi+eax]
        put2(0xd3e8);                  // shr eax, cl
        put3(0x83e001);                // and eax, 1  ; predicted bit
        put2a(0x8987, offc(c));        // mov [edi+&c], eax ; c
        // p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767);
        put2a(0x8b87, offc(a));        // mov eax, [edi+&a]
        put3a(0x8b8487, off(dt2k));    // mov eax, [edi+eax*4+&dt2k] ; weight
        put2(0x7402);                  // jz L1 ; z if c==0
        put2(0xf7d8);                  // neg eax
        put1a(0x25, 0x7fff);           // L1: and eax, 32767
        put4a(0x0fbf8447, off(stretcht)); //movsx eax, word [edi+eax*2+...]
        put2a(0x8987, off(p[i]));      // L2: mov [edi+&p[i]], eax
        break;
      case AVG: // j k wt
        // p[i]=(p[cp[1]]*cp[3]+p[cp[2]]*(256-cp[3]))>>8;
        put2a(0x8b87, off(p[cp[1]]));  // mov eax, [edi+&p[j]]
        put2a(0x2b87, off(p[cp[2]]));  // sub eax, [edi+&p[k]]
        put2a(0x69c0, cp[3]);          // imul eax, wt
        put3(0xc1f808);                // sar eax, 8
        put2a(0x0387, off(p[cp[2]]));  // add eax, [edi+&p[k]]
        put2a(0x8987, off(p[i]));      // mov [edi+&p[i]], eax
        break;
      case MIX2:   // sizebits j k rate mask
                   // c=size cm=wt[size] cxt=input
        // cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1));
        // assert(cr.cxt<cr.a16.size());
        // int w=cr.a16[cr.cxt];
        // assert(w>=0 && w<65536);
        // p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16;
        // assert(p[i]>=-2048 && p[i]<2048);
        put2(0x8b07);                  // mov eax, [edi] ; c8
        put1a(0x25, cp[5]);            // and eax, mask
        put2a(0x0387, off(h[i]));      // add eax, [edi+&h[i]]
        put1a(0x25, (1<<cp[1])-1);     // and eax, size-1
        put2a(0x8987, offc(cxt));      // mov [edi+&cxt], eax ; cxt
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(a16));      // mov esi, [edi+&a16]
        put4(0x0fb70446);              // movzx eax, word [edi+eax*2] ; w
        put2a(0x8b8f, off(p[cp[2]]));  // mov ecx, [edi+&p[j]]
        put2a(0x8b97, off(p[cp[3]]));  // mov edx, [edi+&p[k]]
        put2(0x29d1);                  // sub ecx, edx
        put3(0x0fafc8);                // imul ecx, eax
        put3(0xc1e210);                // shl edx, 16
        put2(0x01d1);                  // add ecx, edx
        put3(0xc1f910);                // sar ecx, 16
        put2a(0x898f, off(p[i]));      // mov [edi+&p[i]]
        break;
      case MIX:    // sizebits j m rate mask
                   // c=size cm=wt[size][m] cxt=index of wt in cm
        // int m=cp[3];
        // assert(m>=1 && m<=i);
        // cr.cxt=h[i]+(c8&cp[5]);
        // cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights
        // assert(cr.cxt<=cr.cm.size()-m);
        // int* wt=(int*)&cr.cm[cr.cxt];
        // p[i]=0;
        // for (int j=0; j<m; ++j)
        //   p[i]+=(wt[j]>>8)*p[cp[2]+j];
        // p[i]=clamp2k(p[i]>>8);
        put2(0x8b07);                          // mov eax, [edi] ; c8
        put1a(0x25, cp[5]);                    // and eax, mask
        put2a(0x0387, off(h[i]));              // add eax, [edi+&h[i]]
        put1a(0x25, (1<<cp[1])-1);             // and eax, size-1
        put2a(0x69c0, cp[3]);                  // imul eax, m
        put2a(0x8987, offc(cxt));              // mov [edi+&cxt], eax ; cxt
        if (S==8) put1(0x48);                  // rex.w
        put2a(0x8bb7, offc(cm));               // mov esi, [edi+&cm]
        if (S==8) put1(0x48);                  // rex.w
        put3(0x8d3486);                        // lea esi, [esi+eax*4] ; wt
        // Unroll summation loop: esi=wt[0..m-1]
        for (int k=0; k<cp[3]; k+=8) {
          const int tail=cp[3]-k;  // number of elements remaining
          // pack 8 elements of wt in xmm1, 8 elements of p in xmm3
          put4a(0xf30f6f8e, k*4);              // movdqu xmm1, [esi+k*4]
          if (tail>3) put4a(0xf30f6f96, k*4+16);//movdqu xmm2, [esi+k*4+16]
          put5(0x660f72e1,0x08);               // psrad xmm1, 8
          if (tail>3) put5(0x660f72e2,0x08);   // psrad xmm2, 8
          put4(0x660f6bca);                    // packssdw xmm1, xmm2
          put4a(0xf30f6f9f, off(p[cp[2]+k]));  // movdqu xmm3, [edi+&p[j+k]]
          if (tail>3)
            put4a(0xf30f6fa7,off(p[cp[2]+k+4]));//movdqu xmm4, [edi+&p[j+k+4]]
          put4(0x660f6bdc);                    // packssdw, xmm3, xmm4
          if (tail>0 && tail<8) {  // last loop, mask extra weights
            put4(0x660f76ed);                  // pcmpeqd xmm5, xmm5 ; -1
            put5(0x660f73dd, 16-tail*2);       // psrldq xmm5, 16-tail*2
            put4(0x660fdbcd);                  // pand xmm1, xmm5
          }
          if (k==0) {  // first loop, initialize sum in xmm0
            put4(0xf30f6fc1);                  // movdqu xmm0, xmm1
            put4(0x660ff5c3);                  // pmaddwd xmm0, xmm3
          }
          else {  // accumulate sum in xmm0
            put4(0x660ff5cb);                  // pmaddwd xmm1, xmm3
            put4(0x660ffec1);                  // paddd xmm0, xmm1
          }
        }
        // Add up the 4 elements of xmm0 = p[i] in the first element
        put4(0xf30f6fc8);                      // movdqu xmm1, xmm0
        put5(0x660f73d9,0x08);                 // psrldq xmm1, 8
        put4(0x660ffec1);                      // paddd xmm0, xmm1
        put4(0xf30f6fc8);                      // movdqu xmm1, xmm0
        put5(0x660f73d9,0x04);                 // psrldq xmm1, 4
        put4(0x660ffec1);                      // paddd xmm0, xmm1
        put4(0x660f7ec0);                      // movd eax, xmm0 ; p[i]
        put3(0xc1f808);                        // sar eax, 8
        put1a(0x3d, 2047);                     // cmp eax, 2047
        put2(0x7e05);                          // jle L1
        put1a(0xb8, 2047);                     // mov eax, 2047
        put1a(0x3d, -2048);                    // L1: cmp eax, -2048
        put2(0x7d05);                          // jge, L2
        put1a(0xb8, -2048);                    // mov eax, -2048
        put2a(0x8987, off(p[i]));              // L2: mov [edi+&p[i]], eax
        break;
      case SSE:  // sizebits j start limit
        // cr.cxt=(h[i]+c8)*32;
        // int pq=p[cp[2]]+992;
        // if (pq<0) pq=0;
        // if (pq>1983) pq=1983;
        // int wt=pq&63;
        // pq>>=6;
        // assert(pq>=0 && pq<=30);
        // cr.cxt+=pq;
        // p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt)       // p0
        //               +(cr.cm(cr.cxt+1)>>10)*wt)>>13);  // p1
        // // p = p0*(64-wt)+p1*wt = (p1-p0)*wt + p0*64
        // cr.cxt+=wt>>5;
        put2a(0x8b8f, off(h[i]));      // mov ecx, [edi+&h[i]]
        put2(0x030f);                  // add ecx, [edi]  ; c0
        put2a(0x81e1, (1<<cp[1])-1);   // and ecx, size-1
        put3(0xc1e105);                // shl ecx, 5  ; cxt in 0..size*32-32
        put2a(0x8b87, off(p[cp[2]]));  // mov eax, [edi+&p[j]] ; pq
        put1a(0x05, 992);              // add eax, 992
        put2(0x31d2);                  // xor edx, edx ; 0
        put2(0x39d0);                  // cmp eax, edx
        put3(0x0f4cc2);                // cmovl eax, edx
        put1a(0xba, 1983);             // mov edx, 1983
        put2(0x39d0);                  // cmp eax, edx
        put3(0x0f4fc2);                // cmovg eax, edx ; pq in 0..1983
        put2(0x89c2);                  // mov edx, eax
        put3(0x83e23f);                // and edx, 63  ; wt in 0..63
        put3(0xc1e806);                // shr eax, 6   ; pq in 0..30
        put2(0x01c1);                  // add ecx, eax ; cxt in 0..size*32-2
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(cm));       // mov esi, [edi+cm]
        put3(0x8b048e);                // mov eax, [esi+ecx*4] ; cm[cxt]
        put4(0x8b5c8e04);              // mov ebx, [esi+ecx*4+4] ; cm[cxt+1]
        put3(0x83fa20);                // cmp edx, 32  ; wt
        put3(0x83d9ff);                // sbb ecx, -1  ; cxt+=wt>>5
        put2a(0x898f, offc(cxt));      // mov [edi+cxt], ecx  ; cxt saved
        put3(0xc1e80a);                // shr eax, 10 ; p0 = cm[cxt]>>10
        put3(0xc1eb0a);                // shr ebx, 10 ; p1 = cm[cxt+1]>>10
        put2(0x29c3);                  // sub ebx, eax, ; p1-p0
        put3(0x0fafda);                // imul ebx, edx ; (p1-p0)*wt
        put3(0xc1e006);                // shr eax, 6
        put2(0x01d8);                  // add eax, ebx ; p in 0..2^28-1
        put3(0xc1e80d);                // shr eax, 13  ; p in 0..32767
        put4a(0x0fbf8447, off(stretcht));  // movsx eax, word [edi+eax*2+...]
        put2a(0x8987, off(p[i]));      // mov [edi+&p[i]], eax
        break;
      default:
        error("invalid ZPAQ component");
    }
  }
  // return squash(p[n-1])
  put2a(0x8b87, off(p[n-1]));          // mov eax, [edi+...]
  put1a(0x05, 0x800);                  // add eax, 2048
  put4a(0x0fbf8447, off(squasht[0]));  // movsx eax, word [edi+eax*2+...]
  put1(0x5f);                          // pop edi
  put1(0x5e);                          // pop esi
  put1(0x5d);                          // pop ebp
  put1(0x5b);                          // pop ebx
  put1(0xc3);                          // ret
  // Initialize for update() Put predictor address in edi/rdi
  // and bit y=0..1 in ebp
  int save_o=o;
  o=5;
  put1a(0xe9, save_o-10);      // jmp update
  o=save_o;
  put1(0x53);                  // push ebx/rbx
  put1(0x55);                  // push ebp/rbp
  put1(0x56);                  // push esi/rsi
  put1(0x57);                  // push edi/rdi
  if (S==4) {
    put4(0x8b7c2414);          // mov edi,[esp+0x14] ; (1st arg = pr)
    put4(0x8b6c2418);          // mov ebp,[esp+0x18] ; (2nd arg = y)
  }
  else {
#if defined(unix) && !defined(__CYGWIN__)  // (1st arg already in rdi)
    put3(0x4889f5);            // mov rbp, rsi (2nd arg in Linux-64)
#else
    put3(0x4889cf);            // mov rdi, rcx (1st arg in Win64)
    put3(0x4889d5);            // mov rbp, rdx (2nd arg)
#endif
  }
  // Code update() for each component
  cp=hcomp+7;
  for (int i=0; i<n; ++i, cp+=compsize[cp[0]]) {
    assert(cp-hcomp<pr.z.cend);
    assert (cp[0]>=1 && cp[0]<=9);
    assert(compsize[cp[0]]>0 && compsize[cp[0]]<8);
    switch (cp[0]) {
      case CONS:  // c
        break;
      case SSE:  // sizebits j start limit
      case CM:   // sizebits limit
        // train(cr, y);
        //
        // reduce prediction error in cr.cm
        // void train(Component& cr, int y) {
        //   assert(y==0 || y==1);
        //   U32& pn=cr.cm(cr.cxt);
        //   U32 count=pn&0x3ff;
        //   int error=y*32767-(cr.cm(cr.cxt)>>17);
        //   pn+=(error*dt[count]&-1024)+(count<cr.limit);
        if (S==8) put1(0x48);          // rex.w (esi->rsi)
        put2a(0x8bb7, offc(cm));       // mov esi,[edi+cm]  ; cm
        put2a(0x8b87, offc(cxt));      // mov eax,[edi+cxt] ; cxt
        put1a(0x25, pr.comp[i].cm.size()-1);  // and eax, size-1
        if (S==8) put1(0x48);          // rex.w
        put3(0x8d3486);                // lea esi,[esi+eax*4] ; &cm[cxt]
        put2(0x8b06);                  // mov eax,[esi] ; cm[cxt]
        put2(0x89c2);                  // mov edx, eax  ; cm[cxt]
        put3(0xc1e811);                // shr eax, 17   ; cm[cxt]>>17
        put2(0x89e9);                  // mov ecx, ebp  ; y
        put3(0xc1e10f);                // shl ecx, 15   ; y*32768
        put2(0x29e9);                  // sub ecx, ebp  ; y*32767
        put2(0x29c1);                  // sub ecx, eax  ; error
        put2a(0x81e2, 0x3ff);          // and edx, 1023 ; count
        put3a(0x8b8497, off(dt));      // mov eax,[edi+edx*4+dt] ; dt[count]
        put3(0x0fafc8);                // imul ecx, eax ; error*dt[count]
        put2a(0x81e1, 0xfffffc00);     // and ecx, -1024
        put2a(0x81fa, cp[2+2*(cp[0]==SSE)]*4); // cmp edx, limit*4
        put2(0x110e);                  // adc [esi], ecx ; pn+=...
        break;
      case ICM:   // sizebits: cxt=bh, ht[c][0..15]=bh row
        // cr.ht[cr.c+(hmap4&15)]=st.next(cr.ht[cr.c+(hmap4&15)], y);
        // U32& pn=cr.cm(cr.cxt);
        // pn+=int(y*32767-(pn>>8))>>2;
      case ISSE:  // sizebits j  -- c=hi, cxt=bh
        // assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]);
        // int err=y*32767-squash(p[i]);
        // int *wt=(int*)&cr.cm[cr.cxt*2];
        // wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13));
        // wt[1]=clamp512k(wt[1]+((err+16)>>5));
        // cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y);
        // update bit history bh to next(bh,y=ebp) in ht[c+(hmap4&15)]
        put3(0x8b4700+off(hmap4));     // mov eax, [edi+&hmap4]
        put3(0x83e00f);                // and eax, 15
        put2a(0x0387, offc(c));        // add eax [edi+&c] ; cxt
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(ht));       // mov esi, [edi+&ht]
        put4(0x0fb61406);              // movzx edx, byte [esi+eax] ; bh
        put4(0x8d5c9500);              // lea ebx, [ebp+edx*4] ; index to st
        put4a(0x0fb69c1f, off(st));    // movzx ebx,byte[edi+ebx+st]; next bh
        put3(0x881c06);                // mov [esi+eax], bl ; save next bh
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(cm));       // mov esi, [edi+&cm]
        // ICM: update cm[cxt=edx=bit history] to reduce prediction error
        // esi = &cm
        if (cp[0]==ICM) {
          if (S==8) put1(0x48);        // rex.w
          put3(0x8d3496);              // lea esi, [esi+edx*4] ; &cm[bh]
          put2(0x8b06);                // mov eax, [esi] ; pn
          put3(0xc1e808);              // shr eax, 8 ; pn>>8
          put2(0x89e9);                // mov ecx, ebp ; y
          put3(0xc1e10f);              // shl ecx, 15
          put2(0x29e9);                // sub ecx, ebp ; y*32767
          put2(0x29c1);                // sub ecx, eax
          put3(0xc1f902);              // sar ecx, 2
          put2(0x010e);                // add [esi], ecx
        }
        // ISSE: update weights. edx=cxt=bit history (0..255), esi=cm[512]
        else {
          put2a(0x8b87, off(p[i]));    // mov eax, [edi+&p[i]]
          put1a(0x05, 2048);           // add eax, 2048
          put4a(0x0fb78447, off(squasht)); // movzx eax, word [edi+eax*2+..]
          put2(0x89e9);                // mov ecx, ebp ; y
          put3(0xc1e10f);              // shl ecx, 15
          put2(0x29e9);                // sub ecx, ebp ; y*32767
          put2(0x29c1);                // sub ecx, eax ; err
          put2a(0x8b87, off(p[cp[2]]));// mov eax, [edi+&p[j]]
          put3(0x0fafc1);              // imul eax, ecx
          put1a(0x05, (1<<12));        // add eax, 4096
          put3(0xc1f80d);              // sar eax, 13
          put3(0x0304d6);              // add eax, [esi+edx*8] ; wt[0]
          put1a(0x3d, (1<<19)-1);      // cmp eax, (1<<19)-1
          put2(0x7e05);                // jle L1
          put1a(0xb8, (1<<19)-1);      // mov eax, (1<<19)-1
          put1a(0x3d, 0xfff80000);     // cmp eax, -1<<19
          put2(0x7d05);                // jge L2
          put1a(0xb8, 0xfff80000);     // mov eax, -1<<19
          put3(0x8904d6);              // L2: mov [esi+edx*8], eax
          put3(0x83c110);              // add ecx, 16 ; err
          put3(0xc1f905);              // sar ecx, 5
          put4(0x034cd604);            // add ecx, [esi+edx*8+4] ; wt[1]
          put2a(0x81f9, (1<<19)-1);    // cmp ecx, (1<<19)-1
          put2(0x7e05);                // jle L3
          put1a(0xb9, (1<<19)-1);      // mov ecx, (1<<19)-1
          put2a(0x81f9, 0xfff80000);   // cmp ecx, -1<<19
          put2(0x7d05);                // jge L4
          put1a(0xb9, 0xfff80000);     // mov ecx, -1<<19
          put4(0x894cd604);            // L4: mov [esi+edx*8+4], ecx
        }
        break;
      case MATCH: // sizebits bufbits:
                  //   a=len, b=offset, c=bit, cm=index, cxt=bitpos
                  //   ht=buf, limit=pos
        // assert(cr.a<=255);
        // assert(cr.c==0 || cr.c==1);
        // assert(cr.cxt<8);
        // assert(cr.cm.size()==(size_t(1)<<cp[1]));
        // assert(cr.ht.size()==(size_t(1)<<cp[2]));
        // if (int(cr.c)!=y) cr.a=0;  // mismatch?
        // cr.ht(cr.limit)+=cr.ht(cr.limit)+y;
        // if (++cr.cxt==8) {
        //   cr.cxt=0;
        //   ++cr.limit;
        //   cr.limit&=(1<<cp[2])-1;
        //   if (cr.a==0) {  // look for a match
        //     cr.b=cr.limit-cr.cm(h[i]);
        //     if (cr.b&(cr.ht.size()-1))
        //       while (cr.a<255
        //              && cr.ht(cr.limit-cr.a-1)==cr.ht(cr.limit-cr.a-cr.b-1))
        //         ++cr.a;
        //   }
        //   else cr.a+=cr.a<255;
        //   cr.cm(h[i])=cr.limit;
        // }
        // Set pointers ebx=&cm, esi=&ht
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(ht));       // mov esi, [edi+&ht]
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8b9f, offc(cm));       // mov ebx, [edi+&cm]
        // if (c!=y) a=0;
        put2a(0x8b87, offc(c));        // mov eax, [edi+&c]
        put2(0x39e8);                  // cmp eax, ebp ; y
        put2(0x7408);                  // jz L1
        put2(0x31c0);                  // xor eax, eax
        put2a(0x8987, offc(a));        // mov [edi+&a], eax
        // ht(limit)+=ht(limit)+y  (1E)
        put2a(0x8b87, offc(limit));    // mov eax, [edi+&limit]
        put4(0x0fb60c06);              // movzx, ecx, byte [esi+eax]
        put2(0x01c9);                  // add ecx, ecx
        put2(0x01e9);                  // add ecx, ebp
        put3(0x880c06);                // mov [esi+eax], cl
        // if (++cxt==8)
        put2a(0x8b87, offc(cxt));      // mov eax, [edi+&cxt]
        put2(0xffc0);                  // inc eax
        put3(0x83e007);                // and eax,byte +0x7
        put2a(0x8987, offc(cxt));      // mov [edi+&cxt],eax
        put2a(0x0f85, 0x9b);           // jnz L8
        // ++limit;
        // limit&=bufsize-1;
        put2a(0x8b87, offc(limit));    // mov eax,[edi+&limit]
        put2(0xffc0);                  // inc eax
        put1a(0x25, (1<<cp[2])-1);     // and eax, bufsize-1
        put2a(0x8987, offc(limit));    // mov [edi+&limit],eax
        // if (a==0)
        put2a(0x8b87, offc(a));        // mov eax, [edi+&a]
        put2(0x85c0);                  // test eax,eax
        put2(0x755c);                  // jnz L6
        //   b=limit-cm(h[i])
        put2a(0x8b8f, off(h[i]));      // mov ecx,[edi+h[i]]
        put2a(0x81e1, (1<<cp[1])-1);   // and ecx, size-1
        put2a(0x8b87, offc(limit));    // mov eax,[edi-&limit]
        put3(0x2b048b);                // sub eax,[ebx+ecx*4]
        put2a(0x8987, offc(b));        // mov [edi+&b],eax
        //   if (b&(bufsize-1))
        put1a(0xa9, (1<<cp[2])-1);     // test eax, bufsize-1
        put2(0x7448);                  // jz L7
        //      while (a<255 && ht(limit-a-1)==ht(limit-a-b-1)) ++a;
        put1(0x53);                    // push ebx
        put2a(0x8b9f, offc(limit));    // mov ebx,[edi+&limit]
        put2(0x89da);                  // mov edx,ebx
        put2(0x29c3);                  // sub ebx,eax  ; limit-b
        put2(0x31c9);                  // xor ecx,ecx  ; a=0
        put2a(0x81f9, 0xff);           // L2: cmp ecx,0xff ; while
        put2(0x741c);                  // jz L3 ; break
        put2(0xffca);                  // dec edx
        put2(0xffcb);                  // dec ebx
        put2a(0x81e2, (1<<cp[2])-1);   // and edx, bufsize-1
        put2a(0x81e3, (1<<cp[2])-1);   // and ebx, bufsize-1
        put3(0x8a0416);                // mov al,[esi+edx]
        put3(0x3a041e);                // cmp al,[esi+ebx]
        put2(0x7504);                  // jnz L3 ; break
        put2(0xffc1);                  // inc ecx
        put2(0xebdc);                  // jmp short L2 ; end while
        put1(0x5b);                    // L3: pop ebx
        put2a(0x898f, offc(a));        // mov [edi+&a],ecx
        put2(0xeb0e);                  // jmp short L7
        // a+=(a<255)
        put1a(0x3d, 0xff);             // L6: cmp eax, 0xff ; a
        put3(0x83d000);                // adc eax, 0
        put2a(0x8987, offc(a));        // mov [edi+&a],eax
        // cm(h[i])=limit
        put2a(0x8b87, off(h[i]));      // L7: mov eax,[edi+&h[i]]
        put1a(0x25, (1<<cp[1])-1);     // and eax, size-1
        put2a(0x8b8f, offc(limit));    // mov ecx,[edi+&limit]
        put3(0x890c83);                // mov [ebx+eax*4],ecx
                                       // L8:
        break;
      case AVG:  // j k wt
        break;
      case MIX2: // sizebits j k rate mask
                 // cm=wt[size], cxt=input
        // assert(cr.a16.size()==cr.c);
        // assert(cr.cxt<cr.a16.size());
        // int err=(y*32767-squash(p[i]))*cp[4]>>5;
        // int w=cr.a16[cr.cxt];
        // w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13;
        // if (w<0) w=0;
        // if (w>65535) w=65535;
        // cr.a16[cr.cxt]=w;
        // set ecx=err
        put2a(0x8b87, off(p[i]));      // mov eax, [edi+&p[i]]
        put1a(0x05, 2048);             // add eax, 2048
        put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht]
        put2(0x89e9);                  // mov ecx, ebp ; y
        put3(0xc1e10f);                // shl ecx, 15
        put2(0x29e9);                  // sub ecx, ebp ; y*32767
        put2(0x29c1);                  // sub ecx, eax
        put2a(0x69c9, cp[4]);          // imul ecx, rate
        put3(0xc1f905);                // sar ecx, 5  ; err
        // Update w
        put2a(0x8b87, offc(cxt));      // mov eax, [edi+&cxt]
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(a16));      // mov esi, [edi+&a16]
        if (S==8) put1(0x48);          // rex.w
        put3(0x8d3446);                // lea esi, [esi+eax*2] ; &w
        put2a(0x8b87, off(p[cp[2]]));  // mov eax, [edi+&p[j]]
        put2a(0x2b87, off(p[cp[3]]));  // sub eax, [edi+&p[k]] ; p[j]-p[k]
        put3(0x0fafc1);                // imul eax, ecx  ; * err
        put1a(0x05, 1<<12);            // add eax, 4096
        put3(0xc1f80d);                // sar eax, 13
        put3(0x0fb716);                // movzx edx, word [esi] ; w
        put2(0x01d0);                  // add eax, edx
        put1a(0xba, 0xffff);           // mov edx, 65535
        put2(0x39d0);                  // cmp eax, edx
        put3(0x0f4fc2);                // cmovg eax, edx
        put2(0x31d2);                  // xor edx, edx
        put2(0x39d0);                  // cmp eax, edx
        put3(0x0f4cc2);                // cmovl eax, edx
        put3(0x668906);                // mov word [esi], ax
        break;
      case MIX: // sizebits j m rate mask
                // cm=wt[size][m], cxt=input
        // int m=cp[3];
        // assert(m>0 && m<=i);
        // assert(cr.cm.size()==m*cr.c);
        // assert(cr.cxt+m<=cr.cm.size());
        // int err=(y*32767-squash(p[i]))*cp[4]>>4;
        // int* wt=(int*)&cr.cm[cr.cxt];
        // for (int j=0; j<m; ++j)
        //   wt[j]=clamp512k(wt[j]+((err*p[cp[2]+j]+(1<<12))>>13));
        // set ecx=err
        put2a(0x8b87, off(p[i]));      // mov eax, [edi+&p[i]]
        put1a(0x05, 2048);             // add eax, 2048
        put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht]
        put2(0x89e9);                  // mov ecx, ebp ; y
        put3(0xc1e10f);                // shl ecx, 15
        put2(0x29e9);                  // sub ecx, ebp ; y*32767
        put2(0x29c1);                  // sub ecx, eax
        put2a(0x69c9, cp[4]);          // imul ecx, rate
        put3(0xc1f904);                // sar ecx, 4  ; err
        // set esi=wt
        put2a(0x8b87, offc(cxt));      // mov eax, [edi+&cxt] ; cxt
        if (S==8) put1(0x48);          // rex.w
        put2a(0x8bb7, offc(cm));       // mov esi, [edi+&cm]
        if (S==8) put1(0x48);          // rex.w
        put3(0x8d3486);                // lea esi, [esi+eax*4] ; wt
        for (int k=0; k<cp[3]; ++k) {
          put2a(0x8b87,off(p[cp[2]+k]));//mov eax, [edi+&p[cp[2]+k]
          put3(0x0fafc1);              // imul eax, ecx
          put1a(0x05, 1<<12);          // add eax, 1<<12
          put3(0xc1f80d);              // sar eax, 13
          put2(0x0306);                // add eax, [esi]
          put1a(0x3d, (1<<19)-1);      // cmp eax, (1<<19)-1
          put2(0x7e05);                // jge L1
          put1a(0xb8, (1<<19)-1);      // mov eax, (1<<19)-1
          put1a(0x3d, 0xfff80000);     // cmp eax, -1<<19
          put2(0x7d05);                // jle L2
          put1a(0xb8, 0xfff80000);     // mov eax, -1<<19
          put2(0x8906);                // L2: mov [esi], eax
          if (k<cp[3]-1) {
            if (S==8) put1(0x48);      // rex.w
            put3(0x83c604);            // add esi, 4
          }
        }
        break;
      default:
        error("invalid ZPAQ component");
    }
  }
  // return from update()
  put1(0x5f);                 // pop edi
  put1(0x5e);                 // pop esi
  put1(0x5d);                 // pop ebp
  put1(0x5b);                 // pop ebx
  put1(0xc3);                 // ret
  return o;
}


// Return a prediction of the next bit in range 0..32767
// Use JIT code starting at pcode[0] if available, or else create it.
int Predictor::predict() {
	if (flagnojit)
		return predict0();
	
  if (!pcode) {
    allocx(pcode, pcode_size, (z.cend*100+4096)&-4096);
    int n=assemble_p();
    if (n>pcode_size) {
      allocx(pcode, pcode_size, n);
      n=assemble_p();
    }
    if (!pcode || n<15 || pcode_size<15)
      error("run JIT failed");
  }
  ///assert(pcode && pcode[0]);
  if (!(pcode && pcode[0]))
	  error("14367: pcode/pcode[0] kaputt\n");
  if (!pcode || !pcode[10]) 
    error("Invalid function pointer at pcode[10]");
 return ((int(*)(Predictor*))&pcode[10])(this);
}
// Update the model with bit y = 0..1
// Use the JIT code starting at pcode[5].
void Predictor::update(int y) {
	if (flagnojit)
	{
		update0(y);
		return;
	}
  assert(pcode && pcode[5]);
  ((void(*)(Predictor*, int))&pcode[5])(this, y);
  // Save bit y in c8, hmap4 (not implemented in JIT)
  c8+=c8+y;
  if (c8>=256) {
    z.run(c8-256);
    hmap4=1;
    c8=1;
    for (int i=0; i<z.header[6]; ++i) h[i]=z.H(i);
  }
  else if (c8>=16 && c8<32)
    hmap4=(hmap4&0xf)<<5|y<<4|1;
  else
    hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf);
}
// Execute the ZPAQL code with input byte or -1 for EOF.
// Use JIT code at rcode if available, or else create it.
void ZPAQL::run(U32 input) {
	if (flagnojit)
	{
		run0(input);
		return;
	}
  if (!rcode) {
    allocx(rcode, rcode_size, (hend*10+4096)&-4096);
    int n=assemble();
    if (n>rcode_size) {
      allocx(rcode, rcode_size, n);
      n=assemble();
    }
    if (!rcode || n<10 || rcode_size<10)
      error("run JIT failed");
  }
  a=input;
  
   if (rcode && rcode[0]) 
   {
        const U32 rc = ((int(*)())(&rcode[0]))();
        if (rc == 0) return;
        else if (rc == 1) libzpaq::error("Bad ZPAQL opcode");
        else if (rc == 2) libzpaq::error("Out of memory");
        else if (rc == 3) libzpaq::error("Write error");
        else libzpaq::error("ZPAQL execution error");
    } 
	else 
	{
        libzpaq::error("14427 Invalid function pointer in rcode[0]");
    }
	/*
	
	
  const U32 rc=((int(*)())(&rcode[0]))();
  if (rc==0) return;
  else if (rc==1) libzpaq::error("Bad ZPAQL opcode");
  else if (rc==2) libzpaq::error("Out of memory");
  else if (rc==3) libzpaq::error("Write error");
  else libzpaq::error("ZPAQL execution error");
  */
}

/// LICENSE_START.6
////////////////////////// divsufsort ///////////////////////////////
/*
 * divsufsort.c for libdivsufsort-lite
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 Just a bit stripped
 */
#define INLINE_divsuf __inline
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
# undef ALPHABET_SIZE
#endif
#if !defined(ALPHABET_SIZE)
# define ALPHABET_SIZE (256)
#endif
#define BUCKET_A_SIZE (ALPHABET_SIZE)
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
#if defined(SS_INSERTIONSORT_THRESHOLD)
# if SS_INSERTIONSORT_THRESHOLD < 1
#  undef SS_INSERTIONSORT_THRESHOLD
#  define SS_INSERTIONSORT_THRESHOLD (1)
# endif
#else
# define SS_INSERTIONSORT_THRESHOLD (8)
#endif
#if defined(SS_BLOCKSIZE)
# if SS_BLOCKSIZE < 0
#  undef SS_BLOCKSIZE
#  define SS_BLOCKSIZE (0)
# elif 32768 <= SS_BLOCKSIZE
#  undef SS_BLOCKSIZE
#  define SS_BLOCKSIZE (32767)
# endif
#else
# define SS_BLOCKSIZE (1024)
#endif
#if SS_BLOCKSIZE == 0
# define SS_MISORT_STACKSIZE (96)
#elif SS_BLOCKSIZE <= 4096
# define SS_MISORT_STACKSIZE (16)
#else
# define SS_MISORT_STACKSIZE (24)
#endif
#define SS_SMERGE_STACKSIZE (32)
#define TR_INSERTIONSORT_THRESHOLD (8)
#define TR_STACKSIZE (64)
#ifndef SWAP
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
#endif
#ifndef MIN
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
#endif
#ifndef MAX
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
#endif
#define STACK_PUSH(_a, _b, _c, _d)\
  do {\
    assert(ssize < STACK_SIZE);\
    stack[ssize].a = (_a), stack[ssize].b = (_b),\
    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  } while(0)
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
  do {\
    assert(ssize < STACK_SIZE);\
    stack[ssize].a = (_a), stack[ssize].b = (_b),\
    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  } while(0)
#define STACK_POP(_a, _b, _c, _d)\
  do {\
    assert(0 <= ssize);\
    if(ssize == 0) { return; }\
    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
  } while(0)
#define STACK_POP5(_a, _b, _c, _d, _e)\
  do {\
    assert(0 <= ssize);\
    if(ssize == 0) { return; }\
    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
  } while(0)
#define BUCKET_A(_c0) bucket_A[(_c0)]
#if ALPHABET_SIZE == 256
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
#else
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
#endif
static const int lg_table[256]= {
 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE_divsuf
int
ss_ilg(int n) {
#if SS_BLOCKSIZE == 0
  return (n & 0xffff0000) ?
          ((n & 0xff000000) ?
            24 + lg_table[(n >> 24) & 0xff] :
            16 + lg_table[(n >> 16) & 0xff]) :
          ((n & 0x0000ff00) ?
             8 + lg_table[(n >>  8) & 0xff] :
             0 + lg_table[(n >>  0) & 0xff]);
#elif SS_BLOCKSIZE < 256
  return lg_table[n];
#else
  return (n & 0xff00) ?
          8 + lg_table[(n >> 8) & 0xff] :
          0 + lg_table[(n >> 0) & 0xff];
#endif
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
#if SS_BLOCKSIZE != 0
static const int sqq_table[256] = {
  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
};
static INLINE_divsuf
int
ss_isqrt(int x) {
  int y, e;
  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
  e = (x & 0xffff0000) ?
        ((x & 0xff000000) ?
          24 + lg_table[(x >> 24) & 0xff] :
          16 + lg_table[(x >> 16) & 0xff]) :
        ((x & 0x0000ff00) ?
           8 + lg_table[(x >>  8) & 0xff] :
           0 + lg_table[(x >>  0) & 0xff]);
  if(e >= 16) {
    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
    y = (y + 1 + x / y) >> 1;
  } else if(e >= 8) {
    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
  } else {
    return sqq_table[x] >> 4;
  }
  return (x < (y * y)) ? y - 1 : y;
}
#endif /* SS_BLOCKSIZE != 0 */
static INLINE_divsuf
int
ss_compare(const unsigned char *T,
           const int *p1, const int *p2,
           int depth) {
  const unsigned char *U1, *U2, *U1n, *U2n;
  for(U1 = T + depth + *p1,
      U2 = T + depth + *p2,
      U1n = T + *(p1 + 1) + 2,
      U2n = T + *(p2 + 1) + 2;
      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
      ++U1, ++U2) {
  }
  return U1 < U1n ?
        (U2 < U2n ? *U1 - *U2 : 1) :
        (U2 < U2n ? -1 : 0);
}
#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
static
void
ss_insertionsort(const unsigned char *T, const int *PA,
                 int *first, int *last, int depth) {
  int *i, *j;
  int t;
  int r;
  for(i = last - 2; first <= i; --i) {
    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
      if(last <= j) { break; }
    }
    if(r == 0) { *j = ~*j; }
    *(j - 1) = t;
  }
}
#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE_divsuf
void
ss_fixdown(const unsigned char *Td, const int *PA,
           int *SA, int i, int size) {
  int j, k;
  int v;
  int c, d, e;
  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
    d = Td[PA[SA[k = j++]]];
    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
    if(d <= c) { break; }
  }
  SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
  int i, m;
  int t;
  m = size;
  if((size % 2) == 0) {
    m--;
    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
  }
  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
  for(i = m - 1; 0 < i; --i) {
    t = SA[0], SA[0] = SA[i];
    ss_fixdown(Td, PA, SA, 0, i);
    SA[i] = t;
  }
}
/* Returns the median of three elements. */
static INLINE_divsuf
int *
ss_median3(const unsigned char *Td, const int *PA,
           int *v1, int *v2, int *v3) {
  int *t;
  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
  if(Td[PA[*v2]] > Td[PA[*v3]]) {
    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
    else { return v3; }
  }
  return v2;
}
/* Returns the median of five elements. */
static INLINE_divsuf
int *
ss_median5(const unsigned char *Td, const int *PA,
           int *v1, int *v2, int *v3, int *v4, int *v5) {
  int *t;
  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
  return v3;
}
/* Returns the pivot element. */
static INLINE_divsuf
int *
ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
  int *middle;
  int t;
  t = last - first;
  middle = first + t / 2;
  if(t <= 512) {
    if(t <= 32) {
      return ss_median3(Td, PA, first, middle, last - 1);
    } else {
      t >>= 2;
      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
    }
  }
  t >>= 3;
  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
  return ss_median3(Td, PA, first, middle, last);
}
/* Binary partition for substrings. */
static INLINE_divsuf
int *
ss_partition(const int *PA,
                    int *first, int *last, int depth) {
  int *a, *b;
  int t;
  for(a = first - 1, b = last;;) {
    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
    if(b <= a) { break; }
    t = ~*b;
    *b = *a;
    *a = t;
  }
  if(first < a) { *first = ~*first; }
  return a;
}
/* Multikey introsort for medium size groups. */
static
void
ss_mintrosort(const unsigned char *T, const int *PA,
              int *first, int *last,
              int depth) {
#define STACK_SIZE SS_MISORT_STACKSIZE
  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
  const unsigned char *Td;
  int *a, *b, *c, *d, *e, *f;
  int s, t;
  int ssize;
  int limit;
  int v, x = 0;
  for(ssize = 0, limit = ss_ilg(last - first);;) {
    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
#if 1 < SS_INSERTIONSORT_THRESHOLD
      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
#endif
      STACK_POP(first, last, depth, limit);
      continue;
    }
    Td = T + depth;
    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
    if(limit < 0) {
      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
        if((x = Td[PA[*a]]) != v) {
          if(1 < (a - first)) { break; }
          v = x;
          first = a;
        }
      }
      if(Td[PA[*first] - 1] < v) {
        first = ss_partition(PA, first, a, depth);
      }
      if((a - first) <= (last - a)) {
        if(1 < (a - first)) {
          STACK_PUSH(a, last, depth, -1);
          last = a, depth += 1, limit = ss_ilg(a - first);
        } else {
          first = a, limit = -1;
        }
      } else {
        if(1 < (last - a)) {
          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
          first = a, limit = -1;
        } else {
          last = a, depth += 1, limit = ss_ilg(a - first);
        }
      }
      continue;
    }
    /* choose pivot */
    a = ss_pivot(Td, PA, first, last);
    v = Td[PA[*a]];
    SWAP(*first, *a);
    /* partition */
    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
    if(((a = b) < last) && (x < v)) {
      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
        if(x == v) { SWAP(*b, *a); ++a; }
      }
    }
    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
    if((b < (d = c)) && (x > v)) {
      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
        if(x == v) { SWAP(*c, *d); --d; }
      }
    }
    for(; b < c;) {
      SWAP(*b, *c);
      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
        if(x == v) { SWAP(*b, *a); ++a; }
      }
      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
        if(x == v) { SWAP(*c, *d); --d; }
      }
    }
    if(a <= d) {
      c = b - 1;
      if((s = a - first) > (t = b - a)) { s = t; }
      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
      if((s = d - c) > (t = last - d - 1)) { s = t; }
      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
      a = first + (b - a), c = last - (d - c);
      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
      if((a - first) <= (last - c)) {
        if((last - c) <= (c - b)) {
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          STACK_PUSH(c, last, depth, limit);
          last = a;
        } else if((a - first) <= (c - b)) {
          STACK_PUSH(c, last, depth, limit);
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          last = a;
        } else {
          STACK_PUSH(c, last, depth, limit);
          STACK_PUSH(first, a, depth, limit);
          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
        }
      } else {
        if((a - first) <= (c - b)) {
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          STACK_PUSH(first, a, depth, limit);
          first = c;
        } else if((last - c) <= (c - b)) {
          STACK_PUSH(first, a, depth, limit);
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          first = c;
        } else {
          STACK_PUSH(first, a, depth, limit);
          STACK_PUSH(c, last, depth, limit);
          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
        }
      }
    } else {
      limit += 1;
      if(Td[PA[*first] - 1] < v) {
        first = ss_partition(PA, first, last, depth);
        limit = ss_ilg(last - first);
      }
      depth += 1;
    }
  }
#undef STACK_SIZE
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
#if SS_BLOCKSIZE != 0
static INLINE_divsuf
void
ss_blockswap(int *a, int *b, int n) {
  int t;
  for(; 0 < n; --n, ++a, ++b) {
    t = *a, *a = *b, *b = t;
  }
}
static INLINE_divsuf
void
ss_rotate(int *first, int *middle, int *last) {
  int *a, *b, t;
  int l, r;
  l = middle - first, r = last - middle;
  for(; (0 < l) && (0 < r);) {
    if(l == r) { ss_blockswap(first, middle, l); break; }
    if(l < r) {
      a = last - 1, b = middle - 1;
      t = *a;
      do {
        *a-- = *b, *b-- = *a;
        if(b < first) {
          *a = t;
          last = a;
          if((r -= l + 1) <= l) { break; }
          a -= 1, b = middle - 1;
          t = *a;
        }
      } while(1);
    } else {
      a = first, b = middle;
      t = *a;
      do {
        *a++ = *b, *b++ = *a;
        if(last <= b) {
          *a = t;
          first = a + 1;
          if((l -= r + 1) <= r) { break; }
          a += 1, b = middle;
          t = *a;
        }
      } while(1);
    }
  }
}
static
void
ss_inplacemerge(const unsigned char *T, const int *PA,
                int *first, int *middle, int *last,
                int depth) {
  const int *p;
  int *a, *b;
  int len, half;
  int q, r;
  int x;
  for(;;) {
    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
    else                { x = 0; p = PA +  *(last - 1); }
    for(a = first, len = middle - first, half = len >> 1, r = -1;
        0 < len;
        len = half, half >>= 1) {
      b = a + half;
      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
      if(q < 0) {
        a = b + 1;
        half -= (len & 1) ^ 1;
      } else {
        r = q;
      }
    }
    if(a < middle) {
      if(r == 0) { *a = ~*a; }
      ss_rotate(a, middle, last);
      last -= middle - a;
      middle = a;
      if(first == middle) { break; }
    }
    --last;
    if(x != 0) { while(*--last < 0) { } }
    if(middle == last) { break; }
  }
}
/* Merge-forward with internal buffer. */
static
void
ss_mergeforward(const unsigned char *T, const int *PA,
                int *first, int *middle, int *last,
                int *buf, int depth) {
  int *a, *b, *c, *bufend;
  int t;
  int r;
  bufend = buf + (middle - first) - 1;
  ss_blockswap(buf, first, middle - first);
  for(t = *(a = first), b = buf, c = middle;;) {
    r = ss_compare(T, PA + *b, PA + *c, depth);
    if(r < 0) {
      do {
        *a++ = *b;
        if(bufend <= b) { *bufend = t; return; }
        *b++ = *a;
      } while(*b < 0);
    } else if(r > 0) {
      do {
        *a++ = *c, *c++ = *a;
        if(last <= c) {
          while(b < bufend) { *a++ = *b, *b++ = *a; }
          *a = *b, *b = t;
          return;
        }
      } while(*c < 0);
    } else {
      *c = ~*c;
      do {
        *a++ = *b;
        if(bufend <= b) { *bufend = t; return; }
        *b++ = *a;
      } while(*b < 0);
      do {
        *a++ = *c, *c++ = *a;
        if(last <= c) {
          while(b < bufend) { *a++ = *b, *b++ = *a; }
          *a = *b, *b = t;
          return;
        }
      } while(*c < 0);
    }
  }
}
/* Merge-backward with internal buffer. */
static
void
ss_mergebackward(const unsigned char *T, const int *PA,
                 int *first, int *middle, int *last,
                 int *buf, int depth) {
  const int *p1, *p2;
  int *a, *b, *c, *bufend;
  int t;
  int r;
  int x;
  bufend = buf + (last - middle) - 1;
  ss_blockswap(buf, middle, last - middle);
  x = 0;
  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
  else                  { p1 = PA +  *bufend; }
  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
  else                  { p2 = PA +  *(middle - 1); }
  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
    r = ss_compare(T, p1, p2, depth);
    if(0 < r) {
      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
      *a-- = *b;
      if(b <= buf) { *buf = t; break; }
      *b-- = *a;
      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
      else       { p1 = PA +  *b; }
    } else if(r < 0) {
      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
      *a-- = *c, *c-- = *a;
      if(c < first) {
        while(buf < b) { *a-- = *b, *b-- = *a; }
        *a = *b, *b = t;
        break;
      }
      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
      else       { p2 = PA +  *c; }
    } else {
      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
      *a-- = ~*b;
      if(b <= buf) { *buf = t; break; }
      *b-- = *a;
      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
      *a-- = *c, *c-- = *a;
      if(c < first) {
        while(buf < b) { *a-- = *b, *b-- = *a; }
        *a = *b, *b = t;
        break;
      }
      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
      else       { p1 = PA +  *b; }
      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
      else       { p2 = PA +  *c; }
    }
  }
}
/* D&C based merge. */
static
void
ss_swapmerge(const unsigned char *T, const int *PA,
             int *first, int *middle, int *last,
             int *buf, int bufsize, int depth) {
#define STACK_SIZE SS_SMERGE_STACKSIZE
#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
#define MERGE_CHECK(a, b, c)\
  do {\
    if(((c) & 1) ||\
       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
      *(a) = ~*(a);\
    }\
    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
      *(b) = ~*(b);\
    }\
  } while(0)
  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
  int *l, *r, *lm, *rm;
  int m, len, half;
  int ssize;
  int check, next;
  for(check = 0, ssize = 0;;) {
    if((last - middle) <= bufsize) {
      if((first < middle) && (middle < last)) {
        ss_mergebackward(T, PA, first, middle, last, buf, depth);
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
      continue;
    }
    if((middle - first) <= bufsize) {
      if(first < middle) {
        ss_mergeforward(T, PA, first, middle, last, buf, depth);
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
      continue;
    }
    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
        0 < len;
        len = half, half >>= 1) {
      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
        m += half + 1;
        half -= (len & 1) ^ 1;
      }
    }
    if(0 < m) {
      lm = middle - m, rm = middle + m;
      ss_blockswap(lm, middle, m);
      l = r = middle, next = 0;
      if(rm < last) {
        if(*rm < 0) {
          *rm = ~*rm;
          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
          next |= 1;
        } else if(first < lm) {
          for(; *r < 0; ++r) { }
          next |= 2;
        }
      }
      if((l - first) <= (last - r)) {
        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
        middle = lm, last = l, check = (check & 3) | (next & 4);
      } else {
        if((next & 2) && (r == middle)) { next ^= 6; }
        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
        first = r, middle = rm, check = (next & 3) | (check & 4);
      }
    } else {
      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
        *middle = ~*middle;
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
    }
  }
#undef STACK_SIZE
}
#endif /* SS_BLOCKSIZE != 0 */
/* Substring sort */
static
void
sssort(const unsigned char *T, const int *PA,
       int *first, int *last,
       int *buf, int bufsize,
       int depth, int n, int lastsuffix) {
  int *a;
#if SS_BLOCKSIZE != 0
  int *b, *middle, *curbuf;
  int j, k, curbufsize, limit;
#endif
  int i;
  if(lastsuffix != 0) { ++first; }
#if SS_BLOCKSIZE == 0
  ss_mintrosort(T, PA, first, last, depth);
#else
  if((bufsize < SS_BLOCKSIZE) &&
      (bufsize < (last - first)) &&
      (bufsize < (limit = ss_isqrt(last - first)))) {
    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
    buf = middle = last - limit, bufsize = limit;
  } else {
    middle = last, limit = 0;
  }
  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
#elif 1 < SS_BLOCKSIZE
    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
#endif
    curbufsize = last - (a + SS_BLOCKSIZE);
    curbuf = a + SS_BLOCKSIZE;
    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
    }
  }
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  ss_mintrosort(T, PA, a, middle, depth);
#elif 1 < SS_BLOCKSIZE
  ss_insertionsort(T, PA, a, middle, depth);
#endif
  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
    if(i & 1) {
      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
      a -= k;
    }
  }
  if(limit != 0) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
    ss_mintrosort(T, PA, middle, last, depth);
#elif 1 < SS_BLOCKSIZE
    ss_insertionsort(T, PA, middle, last, depth);
#endif
    ss_inplacemerge(T, PA, first, middle, last, depth);
  }
#endif
  if(lastsuffix != 0) {
    /* Insert last type B* suffix. */
    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
    for(a = first, i = *(first - 1);
        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
        ++a) {
      *(a - 1) = *a;
    }
    *(a - 1) = i;
  }
}
static INLINE_divsuf
int
tr_ilg(int n) {
  return (n & 0xffff0000) ?
          ((n & 0xff000000) ?
            24 + lg_table[(n >> 24) & 0xff] :
            16 + lg_table[(n >> 16) & 0xff]) :
          ((n & 0x0000ff00) ?
             8 + lg_table[(n >>  8) & 0xff] :
             0 + lg_table[(n >>  0) & 0xff]);
}
/* Simple insertionsort for small size groups. */
static
void
tr_insertionsort(const int *ISAd, int *first, int *last) {
  int *a, *b;
  int t, r;
  for(a = first + 1; a < last; ++a) {
    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
      if(b < first) { break; }
    }
    if(r == 0) { *b = ~*b; }
    *(b + 1) = t;
  }
}
static INLINE_divsuf
void
tr_fixdown(const int *ISAd, int *SA, int i, int size) {
  int j, k;
  int v;
  int c, d, e;
  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
    d = ISAd[SA[k = j++]];
    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
    if(d <= c) { break; }
  }
  SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
tr_heapsort(const int *ISAd, int *SA, int size) {
  int i, m;
  int t;
  m = size;
  if((size % 2) == 0) {
    m--;
    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
  }
  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
  for(i = m - 1; 0 < i; --i) {
    t = SA[0], SA[0] = SA[i];
    tr_fixdown(ISAd, SA, 0, i);
    SA[i] = t;
  }
}
/* Returns the median of three elements. */
static INLINE_divsuf
int *
tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
  int *t;
  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
  if(ISAd[*v2] > ISAd[*v3]) {
    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
    else { return v3; }
  }
  return v2;
}
static INLINE_divsuf
int *
tr_median5(const int *ISAd,
           int *v1, int *v2, int *v3, int *v4, int *v5) {
  int *t;
  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
  return v3;
}
static INLINE_divsuf
int *
tr_pivot(const int *ISAd, int *first, int *last) {
  int *middle;
  int t;
  t = last - first;
  middle = first + t / 2;
  if(t <= 512) {
    if(t <= 32) {
      return tr_median3(ISAd, first, middle, last - 1);
    } else {
      t >>= 2;
      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
    }
  }
  t >>= 3;
  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
  middle = tr_median3(ISAd, middle - t, middle, middle + t);
  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
  return tr_median3(ISAd, first, middle, last);
}
typedef struct _trbudget_t trbudget_t;
struct _trbudget_t {
  int chance;
  int remain;
  int incval;
  int count;
};
static INLINE_divsuf
void
trbudget_init(trbudget_t *budget, int chance, int incval) {
  budget->chance = chance;
  budget->remain = budget->incval = incval;
}
static INLINE_divsuf
int
trbudget_check(trbudget_t *budget, int size) {
  if(size <= budget->remain) { budget->remain -= size; return 1; }
  if(budget->chance == 0) { budget->count += size; return 0; }
  budget->remain += budget->incval - size;
  budget->chance -= 1;
  return 1;
}
static INLINE_divsuf
void
tr_partition(const int *ISAd,
             int *first, int *middle, int *last,
             int **pa, int **pb, int v) {
  int *a, *b, *c, *d, *e, *f;
  int t, s;
  int x = 0;
  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
  if(((a = b) < last) && (x < v)) {
    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
      if(x == v) { SWAP(*b, *a); ++a; }
    }
  }
  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
  if((b < (d = c)) && (x > v)) {
    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
      if(x == v) { SWAP(*c, *d); --d; }
    }
  }
  for(; b < c;) {
    SWAP(*b, *c);
    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
      if(x == v) { SWAP(*b, *a); ++a; }
    }
    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
      if(x == v) { SWAP(*c, *d); --d; }
    }
  }
  if(a <= d) {
    c = b - 1;
    if((s = a - first) > (t = b - a)) { s = t; }
    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
    if((s = d - c) > (t = last - d - 1)) { s = t; }
    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
    first += (b - a), last -= (d - c);
  }
  *pa = first, *pb = last;
}
static
void
tr_copy(int *ISA, const int *SA,
        int *first, int *a, int *b, int *last,
        int depth) {
  /* sort suffixes of middle partition
     by using sorted order of suffixes of left and right partition. */
  int *c, *d, *e;
  int s, v;
  v = b - SA - 1;
  for(c = first, d = a - 1; c <= d; ++c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *++d = s;
      ISA[s] = d - SA;
    }
  }
  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *--d = s;
      ISA[s] = d - SA;
    }
  }
}
static
void
tr_partialcopy(int *ISA, const int *SA,
               int *first, int *a, int *b, int *last,
               int depth) {
  int *c, *d, *e;
  int s, v;
  int rank, lastrank, newrank = -1;
  v = b - SA - 1;
  lastrank = -1;
  for(c = first, d = a - 1; c <= d; ++c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *++d = s;
      rank = ISA[s + depth];
      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
      ISA[s] = newrank;
    }
  }
  lastrank = -1;
  for(e = d; first <= e; --e) {
    rank = ISA[*e];
    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
    if(newrank != rank) { ISA[*e] = newrank; }
  }
  lastrank = -1;
  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *--d = s;
      rank = ISA[s + depth];
      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
      ISA[s] = newrank;
    }
  }
}
static
void
tr_introsort(int *ISA, const int *ISAd,
             int *SA, int *first, int *last,
             trbudget_t *budget) {
#define STACK_SIZE TR_STACKSIZE
  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
  int *a, *b, *c;
  int t;
  int v, x = 0;
  int incr = ISAd - ISA;
  int limit, next;
  int ssize, trlink = -1;
  for(ssize = 0, limit = tr_ilg(last - first);;) {
    if(limit < 0) {
      if(limit == -1) {
        /* tandem repeat partition */
        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
        /* update ranks */
        if(a < last) {
          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
        }
        if(b < last) {
          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
        }
        /* push */
        if(1 < (b - a)) {
          STACK_PUSH5(NULL, a, b, 0, 0);
          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
          trlink = ssize - 2;
        }
        if((a - first) <= (last - b)) {
          if(1 < (a - first)) {
            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
            last = a, limit = tr_ilg(a - first);
          } else if(1 < (last - b)) {
            first = b, limit = tr_ilg(last - b);
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        } else {
          if(1 < (last - b)) {
            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
            first = b, limit = tr_ilg(last - b);
          } else if(1 < (a - first)) {
            last = a, limit = tr_ilg(a - first);
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        }
      } else if(limit == -2) {
        /* tandem repeat copy */
        a = stack[--ssize].b, b = stack[ssize].c;
        if(stack[ssize].d == 0) {
          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
        } else {
          if(0 <= trlink) { stack[trlink].d = -1; }
          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
        }
        STACK_POP5(ISAd, first, last, limit, trlink);
      } else {
        /* sorted partition */
        if(0 <= *first) {
          a = first;
          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
          first = a;
        }
        if(first < last) {
          a = first; do { *a = ~*a; } while(*++a < 0);
          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
          /* push */
          if(trbudget_check(budget, a - first)) {
            if((a - first) <= (last - a)) {
              STACK_PUSH5(ISAd, a, last, -3, trlink);
              ISAd += incr, last = a, limit = next;
            } else {
              if(1 < (last - a)) {
                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
                first = a, limit = -3;
              } else {
                ISAd += incr, last = a, limit = next;
              }
            }
          } else {
            if(0 <= trlink) { stack[trlink].d = -1; }
            if(1 < (last - a)) {
              first = a, limit = -3;
            } else {
              STACK_POP5(ISAd, first, last, limit, trlink);
            }
          }
        } else {
          STACK_POP5(ISAd, first, last, limit, trlink);
        }
      }
      continue;
    }
    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
      tr_insertionsort(ISAd, first, last);
      limit = -3;
      continue;
    }
    if(limit-- == 0) {
      tr_heapsort(ISAd, first, last - first);
      for(a = last - 1; first < a; a = b) {
        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
      }
      limit = -3;
      continue;
    }
    /* choose pivot */
    a = tr_pivot(ISAd, first, last);
    SWAP(*first, *a);
    v = ISAd[*first];
    /* partition */
    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
    if((last - first) != (b - a)) {
      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
      /* update ranks */
      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
      /* push */
      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
        if((a - first) <= (last - b)) {
          if((last - b) <= (b - a)) {
            if(1 < (a - first)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              last = a;
            } else if(1 < (last - b)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              first = b;
            } else {
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else if((a - first) <= (b - a)) {
            if(1 < (a - first)) {
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              last = a;
            } else {
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else {
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            ISAd += incr, first = a, last = b, limit = next;
          }
        } else {
          if((a - first) <= (b - a)) {
            if(1 < (last - b)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              first = b;
            } else if(1 < (a - first)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              last = a;
            } else {
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else if((last - b) <= (b - a)) {
            if(1 < (last - b)) {
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              first = b;
            } else {
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else {
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            ISAd += incr, first = a, last = b, limit = next;
          }
        }
      } else {
        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
        if((a - first) <= (last - b)) {
          if(1 < (a - first)) {
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            last = a;
          } else if(1 < (last - b)) {
            first = b;
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        } else {
          if(1 < (last - b)) {
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            first = b;
          } else if(1 < (a - first)) {
            last = a;
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        }
      }
    } else {
      if(trbudget_check(budget, last - first)) {
        limit = tr_ilg(last - first), ISAd += incr;
      } else {
        if(0 <= trlink) { stack[trlink].d = -1; }
        STACK_POP5(ISAd, first, last, limit, trlink);
      }
    }
  }
#undef STACK_SIZE
}
/* Tandem repeat sort */
static
void
trsort(int *ISA, int *SA, int n, int depth) {
  int *ISAd;
  int *first, *last;
  trbudget_t budget;
  int t, skip, unsorted;
  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
    first = SA;
    skip = 0;
    unsorted = 0;
    do {
      if((t = *first) < 0) { first -= t; skip += t; }
      else {
        if(skip != 0) { *(first + skip) = skip; skip = 0; }
        last = SA + ISA[t] + 1;
        if(1 < (last - first)) {
          budget.count = 0;
          tr_introsort(ISA, ISAd, SA, first, last, &budget);
          if(budget.count != 0) { unsorted += budget.count; }
          else { skip = first - last; }
        } else if((last - first) == 1) {
          skip = -1;
        }
        first = last;
      }
    } while(first < (SA + n));
    if(skip != 0) { *(first + skip) = skip; }
    if(unsorted == 0) { break; }
  }
}
/* Sorts suffixes of type B*. */
static
int
sort_typeBstar(const unsigned char *T, int *SA,
               int *bucket_A, int *bucket_B,
               int n) {
  int *PAb, *ISAb, *buf;
  int i, j, k, t, m, bufsize;
  int c0, c1;
  /* Initialize bucket arrays. */
  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
  /* Count the number of occurrences of the first one or two characters of each
     type A, B and B* suffix. Moreover, store the beginning position of all
     type B* suffixes into the array SA. */
  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
    /* type A suffix. */
    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
    if(0 <= i) {
      /* type B* suffix. */
      ++BUCKET_BSTAR(c0, c1);
      SA[--m] = i;
      /* type B suffix. */
      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
        ++BUCKET_B(c0, c1);
      }
    }
  }
  m = n - m;
/*
note:
  A type B* suffix is lexicographically smaller than a type B suffix that
  begins with the same first two characters.
*/
  /* Calculate the index of start/end point of each bucket. */
  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
    t = i + BUCKET_A(c0);
    BUCKET_A(c0) = i + j; /* start point */
    i = t + BUCKET_B(c0, c0);
    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
      j += BUCKET_BSTAR(c0, c1);
      BUCKET_BSTAR(c0, c1) = j; /* end point */
      i += BUCKET_B(c0, c1);
    }
  }
  if(0 < m) {
    /* Sort the type B* suffixes by their first two characters. */
    PAb = SA + n - m; ISAb = SA + m;
    for(i = m - 2; 0 <= i; --i) {
      t = PAb[i], c0 = T[t], c1 = T[t + 1];
      SA[--BUCKET_BSTAR(c0, c1)] = i;
    }
    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
    /* Sort the type B* substrings using sssort. */
    buf = SA + m, bufsize = n - (2 * m);
    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
        i = BUCKET_BSTAR(c0, c1);
        if(1 < (j - i)) {
          sssort(T, PAb, SA + i, SA + j,
                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
        }
      }
    }
    /* Compute ranks of type B* substrings. */
    for(i = m - 1; 0 <= i; --i) {
      if(0 <= SA[i]) {
        j = i;
        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
        SA[i + 1] = i - j;
        if(i <= 0) { break; }
      }
      j = i;
      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
      ISAb[SA[i]] = j;
    }
    /* Construct the inverse suffix array of type B* suffixes using trsort. */
    trsort(ISAb, SA, m, 1);
    /* Set the sorted order of tyoe B* suffixes. */
    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
      if(0 <= i) {
        t = i;
        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
      }
    }
    /* Calculate the index of start/end point of each bucket. */
    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
      i = BUCKET_A(c0 + 1) - 1;
      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
        t = i - BUCKET_B(c0, c1);
        BUCKET_B(c0, c1) = i; /* end point */
        /* Move all type B* suffixes to the correct position. */
        for(i = t, j = BUCKET_BSTAR(c0, c1);
            j <= k;
            --i, --k) { SA[i] = SA[k]; }
      }
      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
      BUCKET_B(c0, c0) = i; /* end point */
    }
  }
  return m;
}
/* Constructs the suffix array by using the sorted order of type B* suffixes. */
static
void
construct_SA(const unsigned char *T, int *SA,
             int *bucket_A, int *bucket_B,
             int n, int m) {
  int *i, *j, *k;
  int s;
  int c0, c1, c2;
  if(0 < m) {
    /* Construct the sorted order of type B suffixes by using
       the sorted order of type B* suffixes. */
    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
      /* Scan the suffix array from right to left. */
      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
          i <= j;
          --j) {
        if(0 < (s = *j)) {
          assert(T[s] == c1);
          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
          assert(T[s - 1] <= T[s]);
          *j = ~s;
          c0 = T[--s];
          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
          if(c0 != c2) {
            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
            k = SA + BUCKET_B(c2 = c0, c1);
          }
          assert(k < j);
			if (k!=NULL)
			*k-- = s;
		else
		{
			myprintf("15845: k null!\n");
			exit(0);
		}
        } else {
          assert(((s == 0) && (T[s] == c1)) || (s < 0));
          *j = ~s;
        }
      }
    }
  }
  /* Construct the suffix array by using
     the sorted order of type B suffixes. */
  k = SA + BUCKET_A(c2 = T[n - 1]);
  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
  /* Scan the suffix array from left to right. */
  for(i = SA, j = SA + n; i < j; ++i) {
    if(0 < (s = *i)) {
      assert(T[s - 1] >= T[s]);
      c0 = T[--s];
      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
      if(c0 != c2) {
        BUCKET_A(c2) = k - SA;
        k = SA + BUCKET_A(c2 = c0);
      }
      assert(i < k);
      *k++ = s;
    } else {
      assert(s < 0);
      *i = ~s;
    }
  }
}
/*- Function -*/
int
divsufsort(const unsigned char *T, int *SA, int n) {
  int *bucket_A, *bucket_B;
  int m;
  int err = 0;
  /* Check arguments. */
  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
  else if(n == 0) { return 0; }
  else if(n == 1) { SA[0] = 0; return 0; }
  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
  bucket_A = (int *)franz_malloc(BUCKET_A_SIZE * sizeof(int));
  bucket_B = (int *)franz_malloc(BUCKET_B_SIZE * sizeof(int));
  
//	g_allocatedram+=BUCKET_A_SIZE * sizeof(int)+BUCKET_B_SIZE * sizeof(int);

  /* Suffixsort. */
  if((bucket_A != NULL) && (bucket_B != NULL)) {
    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
    construct_SA(T, SA, bucket_A, bucket_B, n, m);
  } else {
    err = -2;
  }
  franz_free(bucket_B);
  franz_free(bucket_A);
  return err;
}
// End divsufsort.c
/// LICENSE_END.6


/////////////////////////////// add ///////////////////////////////////
// Convert non-negative decimal number x to string of at least n digits
std::string itos(int64_t x, int n=1) {
  assert(x>=0);
  assert(n>=0);
  std::string r;
  for (; x || n>0; x/=10, --n) r=std::string(1, '0'+x%10)+r;
  return r;
}
// E8E9 transform of buf[0..n-1] to improve compression of .exe and .dll.
// Patterns (E8|E9 xx xx xx 00|FF) at offset i replace the 3 middle
// bytes with x+i mod 2^24, LSB first, reading backward.
void e8e9(unsigned char* buf, int n) {
  for (int i=n-5; i>=0; --i) {
    if (((buf[i]&254)==0xe8) && ((buf[i+4]+1)&254)==0) {
      unsigned a=(buf[i+1]|buf[i+2]<<8|buf[i+3]<<16)+i;
      buf[i+1]=a;
      buf[i+2]=a>>8;
      buf[i+3]=a>>16;
    }
  }
}
// Encode inbuf to buf using LZ77. args are as follows:
// args[0] is log2 buffer size in MB.
// args[1] is level (1=var. length, 2=byte aligned lz77, 3=bwt) + 4 if E8E9.
// args[2] is the lz77 minimum match length and context order.
// args[3] is the lz77 higher context order to search first, or else 0.
// args[4] is the log2 hash bucket size (number of searches).
// args[5] is the log2 hash table size. If 21+args[0] then use a suffix array.
// args[6] is the secondary context look ahead
// sap is pointer to external suffix array of inbuf or 0. If supplied and
//   args[0]=5..7 then it is assumed that E8E9 was already applied to
//   both the input and sap and the input buffer is not modified.
class LZBuffer: public libzpaq::Reader {
  libzpaq::Array<unsigned> ht;// hash table, confirm in low bits, or SA+ISA
  const unsigned char* in;    // input pointer
  const int checkbits;        // hash confirmation size or lg(ISA size)
  const int level;            // 1=var length LZ77, 2=byte aligned LZ77, 3=BWT
  const unsigned htsize;      // size of hash table
  const unsigned n;           // input length
  unsigned i;                 // current location in in (0 <= i < n)
  const unsigned minMatch;    // minimum match length
  const unsigned minMatch2;   // second context order or 0 if not used
  const unsigned maxMatch;    // longest match length allowed
  const unsigned maxLiteral;  // longest literal length allowed
  const unsigned lookahead;   // second context look ahead
  unsigned h1, h2;            // low, high order context hashes of in[i..]
  const unsigned bucket;      // number of matches to search per hash - 1
  const unsigned shift1, shift2;  // how far to shift h1, h2 per hash
  const int minMatchBoth;     // max(minMatch, minMatch2)
  const unsigned rb;          // number of level 1 r bits in match code
  unsigned bits;              // pending output bits (level 1)
  unsigned nbits;             // number of bits in bits
  unsigned rpos, wpos;        // read, write pointers
  unsigned idx;               // BWT index
  const unsigned* sa;         // suffix array for BWT or LZ77-SA
  unsigned* isa;              // inverse suffix array for LZ77-SA
  enum {BUFSIZE=1<<14};       // output buffer size
  unsigned char buf[BUFSIZE]; // output buffer
  void write_literal(unsigned i, unsigned& lit);
  void write_match(unsigned len, unsigned off);
  void fill();  // encode to buf
  // write k bits of x
  void putb(unsigned x, int k) {
    x&=(1<<k)-1;
    bits|=x<<nbits;
    nbits+=k;
    while (nbits>7) {
      assert(wpos<BUFSIZE);
      if (flagdebug5)
		  myprintf("15900: wpos %d BUFSIZE %d\n",wpos,BUFSIZE);
      buf[wpos++]=bits, bits>>=8, nbits-=8;
    }
  }
  // write last byte
  void flush() {
    assert(wpos<BUFSIZE);
    if (nbits>0) buf[wpos++]=bits;
    bits=nbits=0;
  }
  // write 1 byte
  void put(int c) {
    assert(wpos<BUFSIZE);
    buf[wpos++]=c;
  }
public:
  LZBuffer(StringBuffer& inbuf, int args[], const unsigned* sap=0);
  // return 1 byte of compressed output (overrides Reader)
  int get() {
    int c=-1;
    if (rpos==wpos) fill();
    if (rpos<wpos) c=buf[rpos++];
    if (rpos==wpos) rpos=wpos=0;
    return c;
  }
  // Read up to p[0..n-1] and return bytes read.
  int read(char* p, int n);
};
// LZ/BWT preprocessor for levels 1..3 compression and e8e9 filter.
// Level 1 uses variable length LZ77 codes like in the lazy compressor:
//
//   00,n,L[n] = n literal bytes
//   mm,mmm,n,ll,r,q (mm > 00) = match 4*n+ll at offset (q<<rb)+r-1
//
// where q is written in 8mm+mmm-8 (0..23) bits with an implied leading 1 bit
// and n is written using interleaved Elias Gamma coding, i.e. the leading
// 1 bit is implied, remaining bits are preceded by a 1 and terminated by
// a 0. e.g. abc is written 1,b,1,c,0. Codes are packed LSB first and
// padded with leading 0 bits in the last byte. r is a number with rb bits,
// where rb = log2(blocksize) - 24.
//
// Level 2 is byte oriented LZ77 with minimum match length m = $4 = args[3]
// with m in 1..64. Lengths and offsets are MSB first:
// 00xxxxxx   x+1 (1..64) literals follow
// yyxxxxxx   y+1 (2..4) offset bytes follow, match length x+m (m..m+63)
//
// Level 3 is BWT with the end of string byte coded as 255 and the
// last 4 bytes giving its position LSB first.
// floor(log2(x)) + 1 = number of bits excluding leading zeros (0..32)
int lg(unsigned x) {
  unsigned r=0;
  if (x>=65536) r=16, x>>=16;
  if (x>=256) r+=8, x>>=8;
  if (x>=16) r+=4, x>>=4;
  assert(x>=0 && x<16);
  return
    "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04"[x]+r;
}
// return number of 1 bits in x
int nbits(unsigned x) {
  int r;
  for (r=0; x; x>>=1) r+=x&1;
  return r;
}
// catch buffer overflow with gdb backtrace
void *alpine_memcpy(void *dest, const void *src, size_t n) 
{
	if (dest==NULL)
	{
		myprintf("15965: GURU dest NULL\n");
		exit(0);
	}
	if (src==NULL)
	{
		myprintf("15970: GURU src NULL\n");
		exit(0);
	}
	if (n==0)
		return dest;
	
    unsigned char *d = (unsigned char *)dest;
    const unsigned char *s = (const unsigned char *)src;

    for (size_t i=0;i<n;i++) 
	    d[i]=s[i];
    
    return dest;
}

// Read n bytes of compressed output into p and return number of
// bytes read in 0..n. 0 signals EOF (overrides Reader).
int LZBuffer::read(char* p, int n) 
{
///	if (flagdebug5)
///		myprintf("15964: LZBuffer::read char*p %s  int n %d\n",migliaia(int64_t(p)),n);
  if (rpos==wpos) fill();
  int nr=n;
  if (nr>int(wpos-rpos)) nr=wpos-rpos;
  ///alpine linux does not like this. why? I do not know.
  
  if (flagdebug5)
  {
	if (nr) alpine_memcpy(p, buf+rpos, nr);
  }
  else
  {
	if (nr) memcpy(p, buf+rpos, nr);
  }
  rpos+=nr;
  assert(rpos<=wpos);
  if (rpos==wpos) rpos=wpos=0;
  return nr;
}
LZBuffer::LZBuffer(StringBuffer& inbuf, int args[], const unsigned* sap):
    ht((args[1]&3)==3 ? (inbuf.size()+1)*!sap      // for BWT suffix array
        : args[5]-args[0]<21 ? 1u<<args[5]         // for LZ77 hash table
        : (inbuf.size()*!sap)+(1u<<17<<args[0])),  // for LZ77 SA and ISA
    in(inbuf.data()),
    checkbits(args[5]-args[0]<21 ? 12-args[0] : 17+args[0]),
    level(args[1]&3),
    htsize(ht.size()),
    n(inbuf.size()),
    i(0),
    minMatch(args[2]),
    minMatch2(args[3]),
    maxMatch(BUFSIZE*3),
    maxLiteral(BUFSIZE/4),
    lookahead(args[6]),
    h1(0), h2(0),
    bucket((1<<args[4])-1),
    shift1(minMatch>0 ? (args[5]-1)/minMatch+1 : 1),
    shift2(minMatch2>0 ? (args[5]-1)/minMatch2+1 : 0),
    minMatchBoth(MAX(minMatch, minMatch2+lookahead)+4),
    rb(args[0]>4 ? args[0]-4 : 0),
    bits(0), nbits(0), rpos(0), wpos(0),
    idx(0), sa(0), isa(0) {
  assert(args[0]>=0);
  assert(n<=(1u<<20<<args[0]));
  assert(args[1]>=1 && args[1]<=7 && args[1]!=4);
  assert(level>=1 && level<=3);
  if ((minMatch<4 && level==1) || (minMatch<1 && level==2))
    error("match length $3 too small");
  // e8e9 transform
  if (args[1]>4 && !sap) e8e9(inbuf.data(), n);
  // build suffix array if not supplied
  if (args[5]-args[0]>=21 || level==3) {  // LZ77-SA or BWT
    if (sap)
      sa=sap;
    else {
      assert(ht.size()>=n);
      assert(ht.size()>0);
      sa=&ht[0];
      if (n>0) divsufsort((const unsigned char*)in, (int*)sa, n);
    }
    if (level<3) {
      assert(ht.size()>=(n*(sap==0))+(1u<<17<<args[0]));
      isa=&ht[n*(sap==0)];
    }
  }
}
// Encode from in to buf until end of input or buf is not empty
void LZBuffer::fill() {
  // BWT
  if (level==3) {
    assert(in || n==0);
    assert(sa);
    for (; wpos<BUFSIZE && i<n+5; ++i) {
      if (i==0) put(n>0 ? in[n-1] : 255);
      else if (i>n) put(idx&255), idx>>=8;
      else if (sa[i-1]==0) idx=i, put(255);
      else put(in[sa[i-1]-1]);
    }
    return;
  }
  // LZ77: scan the input
  unsigned lit=0;  // number of output literals pending
  const unsigned mask=(1<<checkbits)-1;
  while (i<n && wpos*2<BUFSIZE) {
    // Search for longest match, or pick closest in case of tie
    unsigned blen=minMatch-1;  // best match length
    unsigned bp=0;  // pointer to best match
    unsigned blit=0;  // literals before best match
    int bscore=0;  // best cost
    // Look up contexts in suffix array
    if (isa) {
      if (sa[isa[i&mask]]!=i) // rebuild ISA
        for (unsigned j=0; j<n; ++j)
          if ((sa[j]&~mask)==(i&~mask))
            isa[sa[j]&mask]=j;
      for (unsigned h=0; h<=lookahead; ++h) {
        unsigned q=isa[(h+i)&mask];  // location of h+i in SA
        assert(q<n);
        if (sa[q]!=h+i) continue;
        for (int j=-1; j<=1; j+=2) {  // search backward and forward
          for (unsigned k=1; k<=bucket; ++k) {
            unsigned p;  // match to be tested
            if (q+j*k<n && (p=sa[q+j*k]-h)<i) {
              assert(p<n);
              unsigned l, l1;  // length of match, leading literals
              for (l=h; i+l<n && l<maxMatch && in[p+l]==in[i+l]; ++l);
              for (l1=h; l1>0 && in[p+l1-1]==in[i+l1-1]; --l1);
              int score=int(l-l1)*8-lg(i-p)-4*(lit==0 && l1>0)-11;
              for (unsigned a=0; a<h; ++a) score=score*5/8;
              if (score>bscore) blen=l, bp=p, blit=l1, bscore=score;
              if (l<blen || l<minMatch || l>255) break;
            }
          }
        }
        if (bscore<=0 || blen<minMatch) break;
      }
    }
    // Look up contexts in a hash table.
    // Try the longest context orders first. If a match is found, then
    // skip the lower order as a speed optimization.
    else if (level==1 || minMatch<=64) {
      if (minMatch2>0) {
        for (unsigned k=0; k<=bucket; ++k) {
          unsigned p=ht[h2^k];
          if (p && (p&mask)==(in[i+3]&mask)) {
            p>>=checkbits;
            if (p<i && i+blen<=n && in[p+blen-1]==in[i+blen-1]) {
              unsigned l;  // match length from lookahead
              for (l=lookahead; i+l<n && l<maxMatch && in[p+l]==in[i+l]; ++l);
              if (l>=minMatch2+lookahead) {
                int l1;  // length back from lookahead
                for (l1=lookahead; l1>0 && in[p+l1-1]==in[i+l1-1]; --l1);
                assert(l1>=0 && l1<=int(lookahead));
                int score=int(l-l1)*8-lg(i-p)-8*(lit==0 && l1>0)-11;
                if (score>bscore) blen=l, bp=p, blit=l1, bscore=score;
              }
            }
          }
          if (blen>=128) break;
        }
      }
      // Search the lower order context
      if (!minMatch2 || blen<minMatch2) {
        for (unsigned k=0; k<=bucket; ++k) {
          unsigned p=ht[h1^k];
          if (p && i+3<n && (p&mask)==(in[i+3]&mask)) {
            p>>=checkbits;
            if (p<i && i+blen<=n && in[p+blen-1]==in[i+blen-1]) {
              unsigned l;
              for (l=0; i+l<n && l<maxMatch && in[p+l]==in[i+l]; ++l);
              int score=l*8-lg(i-p)-2*(lit>0)-11;
              if (score>bscore) blen=l, bp=p, blit=0, bscore=score;
            }
          }
          if (blen>=128) break;
        }
      }
    }
    // If match is long enough, then output any pending literals first,
    // and then the match. blen is the length of the match.
    assert(i>=bp);
    const unsigned off=i-bp;  // offset
    if (off>0 && bscore>0
        && blen-blit>=minMatch+(level==2)*((off>=(1<<16))+(off>=(1<<24)))) {
      lit+=blit;
      write_literal(i+blit, lit);
      write_match(blen-blit, off);
    }
    // Otherwise add to literal length
    else {
      blen=1;
      ++lit;
    }
    // Update index, advance blen bytes
    if (isa)
      i+=blen;
    else {
      while (blen--) {
        if (i+minMatchBoth<n) {
          unsigned ih=((i*1234547)>>19)&bucket;
          const unsigned p=(i<<checkbits)|(in[i+3]&mask);
          assert(ih<=bucket);
          if (minMatch2) {
            ht[h2^ih]=p;
            h2=(((h2*9)<<shift2)
                +(in[i+minMatch2+lookahead]+1)*23456789u)&(htsize-1);
          }
          ht[h1^ih]=p;	
		  ///alpine
/*
		  if (flagdebug5)
		  {
			  myprintf("16181: inzuppo i %d minMatch %d sum %d\n",i,minMatch,i+minMatch);
			  myprintf("16186: byte %03d %03d+1\n",in[i+minMatch] & 0xff,(in[i+minMatch]+1) & 0xff);
			  myprintf("16187: Z1   %d\n",((in[i+minMatch]+1)*123456791u)&(htsize-1));
			  myprintf("16188: Z2   %d\n",(h1*5)<<shift1);
		  }
*/
          h1=(((h1*5)<<shift1)+(in[i+minMatch]+1)*123456791u)&(htsize-1);
        }
        ++i;
      }
    }
    // Write long literals to keep buf from filling up
    if (lit>=maxLiteral)
      write_literal(i, lit);
  }
  // Write pending literals at end of input
  assert(i<=n);
  if (i==n) {
    write_literal(n, lit);
    flush();
  }
}
// Write literal sequence in[i-lit..i-1], set lit=0
void LZBuffer::write_literal(unsigned i, unsigned& lit) {
  assert(lit>=0);
  assert(i>=0 && i<=n);
  assert(i>=lit);
  if (level==1) {
    if (lit<1) return;
    int ll=lg(lit);
    assert(ll>=1 && ll<=24);
    putb(0, 2);
    --ll;
    while (--ll>=0) {
      putb(1, 1);
      putb((lit>>ll)&1, 1);
    }
    putb(0, 1);
    while (lit) putb(in[i-lit--], 8);
  }
  else {
    assert(level==2);
    while (lit>0) {
      unsigned lit1=lit;
      if (lit1>64) lit1=64;
      put(lit1-1);
      for (unsigned j=i-lit; j<i-lit+lit1; ++j) put(in[j]);
      lit-=lit1;
    }
  }
}
// Write match sequence of given length and offset
void LZBuffer::write_match(unsigned len, unsigned off) {
  // mm,mmm,n,ll,r,q[mmmmm-8] = match n*4+ll, offset ((q-1)<<rb)+r+1
  if (level==1) {
    assert(len>=minMatch && len<=maxMatch);
    assert(off>0);
    assert(len>=4);
    assert(rb>=0 && rb<=8);
    int ll=lg(len)-1;
    assert(ll>=2);
    off+=(1<<rb)-1;
    int lo=lg(off)-1-rb;
    assert(lo>=0 && lo<=23);
    putb((lo+8)>>3, 2);// mm
    putb(lo&7, 3);     // mmm
    while (--ll>=2) {  // n
      putb(1, 1);
      putb((len>>ll)&1, 1);
    }
    putb(0, 1);
    putb(len&3, 2);    // ll
    putb(off, rb);     // r
    putb(off>>rb, lo); // q
  }
  // x[2]:len[6] off[x-1]
  else {
    assert(level==2);
    assert(minMatch>=1 && minMatch<=64);
    --off;
    while (len>0) {  // Split long matches to len1=minMatch..minMatch+63
      const unsigned len1=len>minMatch*2+63 ? minMatch+63 :
          len>minMatch+63 ? len-minMatch : len;
      assert(wpos<BUFSIZE-5);
      assert(len1>=minMatch && len1<minMatch+64);
      if (off<(1<<16)) {
        put(64+len1-minMatch);
        put(off>>8);
        put(off);
      }
      else if (off<(1<<24)) {
        put(128+len1-minMatch);
        put(off>>16);
        put(off>>8);
        put(off);
      }
      else {
        put(192+len1-minMatch);
        put(off>>24);
        put(off>>16);
        put(off>>8);
        put(off);
      }
      len-=len1;
    }
  }
}
// Generate a config file from the method argument with syntax:
// {0|x|s|i}[N1[,N2]...][{ciamtswf<cfg>}[N1[,N2]]...]...
std::string makeConfig(const char* method, int args[]) {
  assert(method);
  const char type=method[0];
  assert(type=='x' || type=='s' || type=='0' || type=='i');
  // Read "{x|s|i|0}N1,N2...N9" into args[0..8] ($1..$9)
  args[0]=0;  // log block size in MiB
  args[1]=0;  // 0=none, 1=var-LZ77, 2=byte-LZ77, 3=BWT, 4..7 adds E8E9
  args[2]=0;  // lz77 minimum match length
  args[3]=0;  // secondary context length
  args[4]=0;  // log searches
  args[5]=0;  // lz77 hash table size or SA if args[0]+21
  args[6]=0;  // secondary context look ahead
  args[7]=0;  // not used
  args[8]=0;  // not used
  if (isdigit(*++method)) args[0]=0;
  for (int i=0; i<9 && (isdigit(*method) || *method==',' || *method=='.');) {
    if (isdigit(*method))
      args[i]=args[i]*10+*method-'0';
    else if (++i<9)
      args[i]=0;
    ++method;
  }
  // "0..." = No compression
  if (type=='0')
    return "comp 0 0 0 0 0 hcomp end\n";
  // Generate the postprocessor
  std::string hdr, pcomp;
  const int level=args[1]&3;
  const bool doe8=args[1]>=4 && args[1]<=7;
  // LZ77+Huffman, with or without E8E9
  if (level==1) {
    const int rb=args[0]>4 ? args[0]-4 : 0;
    hdr="comp 9 16 0 $1+20 ";
    pcomp=
    "pcomp lazy2 3 ;\n"
    " (r1 = state\n"
    "  r2 = len - match or literal length\n"
    "  r3 = m - number of offset bits expected\n"
    "  r4 = ptr to buf\n"
    "  r5 = r - low bits of offset\n"
    "  c = bits - input buffer\n"
    "  d = n - number of bits in c)\n"
    "\n"
    "  a> 255 if\n";
    if (doe8)
      pcomp+=
      "    b=0 d=r 4 do (for b=0..d-1, d = end of buf)\n"
      "      a=b a==d ifnot\n"
      "        a+= 4 a<d if\n"
      "          a=*b a&= 254 a== 232 if (e8 or e9?)\n"
      "            c=b b++ b++ b++ b++ a=*b a++ a&= 254 a== 0 if (00 or ff)\n"
      "              b-- a=*b\n"
      "              b-- a<<= 8 a+=*b\n"
      "              b-- a<<= 8 a+=*b\n"
      "              a-=b a++\n"
      "              *b=a a>>= 8 b++\n"
      "              *b=a a>>= 8 b++\n"
      "              *b=a b++\n"
      "            endif\n"
      "            b=c\n"
      "          endif\n"
      "        endif\n"
      "        a=*b out b++\n"
      "      forever\n"
      "    endif\n"
      "\n";
    pcomp+=
    "    (reset state)\n"
    "    a=0 b=0 c=0 d=0 r=a 1 r=a 2 r=a 3 r=a 4\n"
    "    halt\n"
    "  endif\n"
    "\n"
    "  a<<=d a+=c c=a               (bits+=a<<n)\n"
    "  a= 8 a+=d d=a                (n+=8)\n"
    "\n"
    "  (if state==0 (expect new code))\n"
    "  a=r 1 a== 0 if (match code mm,mmm)\n"
    "    a= 1 r=a 2                 (len=1)\n"
    "    a=c a&= 3 a> 0 if          (if (bits&3))\n"
    "      a-- a<<= 3 r=a 3           (m=((bits&3)-1)*8)\n"
    "      a=c a>>= 2 c=a             (bits>>=2)\n"
    "      b=r 3 a&= 7 a+=b r=a 3     (m+=bits&7)\n"
    "      a=c a>>= 3 c=a             (bits>>=3)\n"
    "      a=d a-= 5 d=a              (n-=5)\n"
    "      a= 1 r=a 1                 (state=1)\n"
    "    else (literal, discard 00)\n"
    "      a=c a>>= 2 c=a             (bits>>=2)\n"
    "      d-- d--                    (n-=2)\n"
    "      a= 3 r=a 1                 (state=3)\n"
    "    endif\n"
    "  endif\n"
    "\n"
    "  (while state==1 && n>=3 (expect match length n*4+ll -> r2))\n"
    "  do a=r 1 a== 1 if a=d a> 2 if\n"
    "    a=c a&= 1 a== 1 if         (if bits&1)\n"
    "      a=c a>>= 1 c=a             (bits>>=1)\n"
    "      b=r 2 a=c a&= 1 a+=b a+=b r=a 2 (len+=len+(bits&1))\n"
    "      a=c a>>= 1 c=a             (bits>>=1)\n"
    "      d-- d--                    (n-=2)\n"
    "    else\n"
    "      a=c a>>= 1 c=a             (bits>>=1)\n"
    "      a=r 2 a<<= 2 b=a           (len<<=2)\n"
    "      a=c a&= 3 a+=b r=a 2       (len+=bits&3)\n"
    "      a=c a>>= 2 c=a             (bits>>=2)\n"
    "      d-- d-- d--                (n-=3)\n";
    if (rb)
      pcomp+="      a= 5 r=a 1                 (state=5)\n";
    else
      pcomp+="      a= 2 r=a 1                 (state=2)\n";
    pcomp+=
    "    endif\n"
    "  forever endif endif\n"
    "\n";
    if (rb) pcomp+=  // save r in r5
      "  (if state==5 && n>=8) (expect low bits of offset to put in r5)\n"
      "  a=r 1 a== 5 if a=d a> "+itos(rb-1)+" if\n"
      "    a=c a&= "+itos((1<<rb)-1)+" r=a 5            (save r in r5)\n"
      "    a=c a>>= "+itos(rb)+" c=a\n"
      "    a=d a-= "+itos(rb)+ " d=a\n"
      "    a= 2 r=a 1                   (go to state 2)\n"
      "  endif endif\n"
      "\n";
    pcomp+=
    "  (if state==2 && n>=m) (expect m offset bits)\n"
    "  a=r 1 a== 2 if a=r 3 a>d ifnot\n"
    "    a=c r=a 6 a=d r=a 7          (save c=bits, d=n in r6,r7)\n"
    "    b=r 3 a= 1 a<<=b d=a         (d=1<<m)\n"
    "    a-- a&=c a+=d                (d=offset=bits&((1<<m)-1)|(1<<m))\n";
    if (rb)
      pcomp+=  // insert r into low bits of d
      "    a<<= "+itos(rb)+" d=r 5 a+=d a-= "+itos((1<<rb)-1)+"\n";
    pcomp+=
    "    d=a b=r 4 a=b a-=d c=a       (c=p=(b=ptr)-offset)\n"
    "\n"
    "    (while len-- (copy and output match d bytes from *c to *b))\n"
    "    d=r 2 do a=d a> 0 if d--\n"
    "      a=*c *b=a c++ b++          (buf[ptr++]-buf[p++])\n";
    if (!doe8) pcomp+=" out\n";
    pcomp+=
    "    forever endif\n"
    "    a=b r=a 4\n"
    "\n"
    "    a=r 6 b=r 3 a>>=b c=a        (bits>>=m)\n"
    "    a=r 7 a-=b d=a               (n-=m)\n"
    "    a=0 r=a 1                    (state=0)\n"
    "  endif endif\n"
    "\n"
    "  (while state==3 && n>=2 (expect literal length))\n"
    "  do a=r 1 a== 3 if a=d a> 1 if\n"
    "    a=c a&= 1 a== 1 if         (if bits&1)\n"
    "      a=c a>>= 1 c=a              (bits>>=1)\n"
    "      b=r 2 a&= 1 a+=b a+=b r=a 2 (len+=len+(bits&1))\n"
    "      a=c a>>= 1 c=a              (bits>>=1)\n"
    "      d-- d--                     (n-=2)\n"
    "    else\n"
    "      a=c a>>= 1 c=a              (bits>>=1)\n"
    "      d--                         (--n)\n"
    "      a= 4 r=a 1                  (state=4)\n"
    "    endif\n"
    "  forever endif endif\n"
    "\n"
    "  (if state==4 && n>=8 (expect len literals))\n"
    "  a=r 1 a== 4 if a=d a> 7 if\n"
    "    b=r 4 a=c *b=a\n";
    if (!doe8) pcomp+=" out\n";
    pcomp+=
    "    b++ a=b r=a 4                 (buf[ptr++]=bits)\n"
    "    a=c a>>= 8 c=a                (bits>>=8)\n"
    "    a=d a-= 8 d=a                 (n-=8)\n"
    "    a=r 2 a-- r=a 2 a== 0 if      (if --len<1)\n"
    "      a=0 r=a 1                     (state=0)\n"
    "    endif\n"
    "  endif endif\n"
    "  halt\n"
    "end\n";
  }
  // Byte aligned LZ77, with or without E8E9
  else if (level==2) {
    hdr="comp 9 16 0 $1+20 ";
    pcomp=
    "pcomp lzpre c ;\n"
    "  (Decode LZ77: d=state, M=output buffer, b=size)\n"
    "  a> 255 if (at EOF decode e8e9 and output)\n";
    if (doe8)
      pcomp+=
      "    d=b b=0 do (for b=0..d-1, d = end of buf)\n"
      "      a=b a==d ifnot\n"
      "        a+= 4 a<d if\n"
      "          a=*b a&= 254 a== 232 if (e8 or e9?)\n"
      "            c=b b++ b++ b++ b++ a=*b a++ a&= 254 a== 0 if (00 or ff)\n"
      "              b-- a=*b\n"
      "              b-- a<<= 8 a+=*b\n"
      "              b-- a<<= 8 a+=*b\n"
      "              a-=b a++\n"
      "              *b=a a>>= 8 b++\n"
      "              *b=a a>>= 8 b++\n"
      "              *b=a b++\n"
      "            endif\n"
      "            b=c\n"
      "          endif\n"
      "        endif\n"
      "        a=*b out b++\n"
      "      forever\n"
      "    endif\n";
    pcomp+=
    "    b=0 c=0 d=0 a=0 r=a 1 r=a 2 (reset state)\n"
    "  halt\n"
    "  endif\n"
    "\n"
    "  (in state d==0, expect a new code)\n"
    "  (put length in r1 and initial part of offset in r2)\n"
    "  c=a a=d a== 0 if\n"
    "    a=c a>>= 6 a++ d=a\n"
    "    a== 1 if (literal?)\n"
    "      a+=c r=a 1 a=0 r=a 2\n"
    "    else (3 to 5 byte match)\n"
    "      d++ a=c a&= 63 a+= $3 r=a 1 a=0 r=a 2\n"
    "    endif\n"
    "  else\n"
    "    a== 1 if (writing literal)\n"
    "      a=c *b=a b++\n";
    if (!doe8) pcomp+=" out\n";
    pcomp+=
    "      a=r 1 a-- a== 0 if d=0 endif r=a 1 (if (--len==0) state=0)\n"
    "    else\n"
    "      a> 2 if (reading offset)\n"
    "        a=r 2 a<<= 8 a|=c r=a 2 d-- (off=off<<8|c, --state)\n"
    "      else (state==2, write match)\n"
    "        a=r 2 a<<= 8 a|=c c=a a=b a-=c a-- c=a (c=i-off-1)\n"
    "        d=r 1 (d=len)\n"
    "        do (copy and output d=len bytes)\n"
    "          a=*c *b=a c++ b++\n";
    if (!doe8) pcomp+=" out\n";
    pcomp+=
    "        d-- a=d a> 0 while\n"
    "        (d=state=0. off, len don\'t matter)\n"
    "      endif\n"
    "    endif\n"
    "  endif\n"
    "  halt\n"
    "end\n";
  }
  // BWT with or without E8E9
  else if (level==3) {  // IBWT
    hdr="comp 9 16 $1+20 $1+20 ";  // 2^$1 = block size in MB
    pcomp=
    "pcomp bwtrle c ;\n"
    "\n"
    "  (read BWT, index into M, size in b)\n"
    "  a> 255 ifnot\n"
    "    *b=a b++\n"
    "\n"
    "  (inverse BWT)\n"
    "  elsel\n"
    "\n"
    "    (index in last 4 bytes, put in c and R1)\n"
    "    b-- a=*b\n"
    "    b-- a<<= 8 a+=*b\n"
    "    b-- a<<= 8 a+=*b\n"
    "    b-- a<<= 8 a+=*b c=a r=a 1\n"
    "\n"
    "    (save size in R2)\n"
    "    a=b r=a 2\n"
    "\n"
    "    (count bytes in H[~1..~255, ~0])\n"
    "    do\n"
    "      a=b a> 0 if\n"
    "        b-- a=*b a++ a&= 255 d=a d! *d++\n"
    "      forever\n"
    "    endif\n"
    "\n"
    "    (cumulative counts: H[~i=0..255] = count of bytes before i)\n"
    "    d=0 d! *d= 1 a=0\n"
    "    do\n"
    "      a+=*d *d=a d--\n"
    "    d<>a a! a> 255 a! d<>a until\n"
    "\n"
    "    (build first part of linked list in H[0..idx-1])\n"
    "    b=0 do\n"
    "      a=c a>b if\n"
    "        d=*b d! *d++ d=*d d-- *d=b\n"
    "      b++ forever\n"
    "    endif\n"
    "\n"
    "    (rest of list in H[idx+1..n-1])\n"
    "    b=c b++ c=r 2 do\n"
    "      a=c a>b if\n"
    "        d=*b d! *d++ d=*d d-- *d=b\n"
    "      b++ forever\n"
    "    endif\n"
    "\n";
    if (args[0]<=4) {  // faster IBWT list traversal limited to 16 MB blocks
      pcomp+=
      "    (copy M to low 8 bits of H to reduce cache misses in next loop)\n"
      "    b=0 do\n"
      "      a=c a>b if\n"
      "        d=b a=*d a<<= 8 a+=*b *d=a\n"
      "      b++ forever\n"
      "    endif\n"
      "\n"
      "    (traverse list and output or copy to M)\n"
      "    d=r 1 b=0 do\n"
      "      a=d a== 0 ifnot\n"
      "        a=*d a>>= 8 d=a\n";
      if (doe8) pcomp+=" *b=*d b++\n";
      else      pcomp+=" a=*d out\n";
      pcomp+=
      "      forever\n"
      "    endif\n"
      "\n";
      if (doe8)  // IBWT+E8E9
        pcomp+=
        "    (e8e9 transform to out)\n"
        "    d=b b=0 do (for b=0..d-1, d = end of buf)\n"
        "      a=b a==d ifnot\n"
        "        a+= 4 a<d if\n"
        "          a=*b a&= 254 a== 232 if\n"
        "            c=b b++ b++ b++ b++ a=*b a++ a&= 254 a== 0 if\n"
        "              b-- a=*b\n"
        "              b-- a<<= 8 a+=*b\n"
        "              b-- a<<= 8 a+=*b\n"
        "              a-=b a++\n"
        "              *b=a a>>= 8 b++\n"
        "              *b=a a>>= 8 b++\n"
        "              *b=a b++\n"
        "            endif\n"
        "            b=c\n"
        "          endif\n"
        "        endif\n"
        "        a=*b out b++\n"
        "      forever\n"
        "    endif\n";
      pcomp+=
      "  endif\n"
      "  halt\n"
      "end\n";
    }
    else {  // slower IBWT list traversal for all sized blocks
      if (doe8) {  // E8E9 after IBWT
        pcomp+=
        "    (R2 = output size without EOS)\n"
        "    a=r 2 a-- r=a 2\n"
        "\n"
        "    (traverse list (d = IBWT pointer) and output inverse e8e9)\n"
        "    (C = offset = 0..R2-1)\n"
        "    (R4 = last 4 bytes shifted in from MSB end)\n"
        "    (R5 = temp pending output byte)\n"
        "    c=0 d=r 1 do\n"
        "      a=d a== 0 ifnot\n"
        "        d=*d\n"
        "\n"
        "        (store byte in R4 and shift out to R5)\n"
        "        b=d a=*b a<<= 24 b=a\n"
        "        a=r 4 r=a 5 a>>= 8 a|=b r=a 4\n"
        "\n"
        "        (if E8|E9 xx xx xx 00|FF in R4:R5 then subtract c from x)\n"
        "        a=c a> 3 if\n"
        "          a=r 5 a&= 254 a== 232 if\n"
        "            a=r 4 a>>= 24 b=a a++ a&= 254 a< 2 if\n"
        "              a=r 4 a-=c a+= 4 a<<= 8 a>>= 8 \n"
        "              b<>a a<<= 24 a+=b r=a 4\n"
        "            endif\n"
        "          endif\n"
        "        endif\n"
        "\n"
        "        (output buffered byte)\n"
        "        a=c a> 3 if a=r 5 out endif c++\n"
        "\n"
        "      forever\n"
        "    endif\n"
        "\n"
        "    (output up to 4 pending bytes in R4)\n"
        "    b=r 4\n"
        "    a=c a> 3 a=b if out endif a>>= 8 b=a\n"
        "    a=c a> 2 a=b if out endif a>>= 8 b=a\n"
        "    a=c a> 1 a=b if out endif a>>= 8 b=a\n"
        "    a=c a> 0 a=b if out endif\n"
        "\n"
        "  endif\n"
        "  halt\n"
        "end\n";
      }
      else {
        pcomp+=
        "    (traverse list and output)\n"
        "    d=r 1 do\n"
        "      a=d a== 0 ifnot\n"
        "        d=*d\n"
        "        b=d a=*b out\n"
        "      forever\n"
        "    endif\n"
        "  endif\n"
        "  halt\n"
        "end\n";
      }
    }
  }
  // E8E9 or no preprocessing
  else if (level==0) {
    hdr="comp 9 16 0 0 ";
    if (doe8) { // E8E9?
      pcomp=
      "pcomp e8e9 d ;\n"
      "  a> 255 if\n"
      "    a=c a> 4 if\n"
      "      c= 4\n"
      "    else\n"
      "      a! a+= 5 a<<= 3 d=a a=b a>>=d b=a\n"
      "    endif\n"
      "    do a=c a> 0 if\n"
      "      a=b out a>>= 8 b=a c--\n"
      "    forever endif\n"
      "  else\n"
      "    *b=b a<<= 24 d=a a=b a>>= 8 a+=d b=a c++\n"
      "    a=c a> 4 if\n"
      "      a=*b out\n"
      "      a&= 254 a== 232 if\n"
      "        a=b a>>= 24 a++ a&= 254 a== 0 if\n"
      "          a=b a>>= 24 a<<= 24 d=a\n"
      "          a=b a-=c a+= 5\n"
      "          a<<= 8 a>>= 8 a|=d b=a\n"
      "        endif\n"
      "      endif\n"
      "    endif\n"
      "  endif\n"
      "  halt\n"
      "end\n";
    }
    else
      pcomp="end\n";
  }
  else
    error("Unsupported method");
  // Build context model (comp, hcomp) assuming:
  // H[0..254] = contexts
  // H[255..511] = location of last byte i-255
  // M = last 64K bytes, filling backward
  // C = pointer to most recent byte
  // R1 = level 2 lz77 1+bytes expected until next code, 0=init
  // R2 = level 2 lz77 first byte of code
  int ncomp=0;  // number of components
  const int membits=args[0]+20;
  int sb=5;  // bits in last context
  std::string comp;
  std::string hcomp="hcomp\n"
    "c-- *c=a a+= 255 d=a *d=c\n";
  if (level==2) {  // put level 2 lz77 parse state in R1, R2
    hcomp+=
    "  (decode lz77 into M. Codes:\n"
    "  00xxxxxx = literal length xxxxxx+1\n"
    "  xx......, xx > 0 = match with xx offset bytes to follow)\n"
    "\n"
    "  a=r 1 a== 0 if (init)\n"
    "    a= "+itos(111+57*doe8)+" (skip post code)\n"
    "  else a== 1 if  (new code?)\n"
    "    a=*c r=a 2  (save code in R2)\n"
    "    a> 63 if a>>= 6 a++ a++  (match)\n"
    "    else a++ a++ endif  (literal)\n"
    "  else (read rest of code)\n"
    "    a--\n"
    "  endif endif\n"
    "  r=a 1  (R1 = 1+expected bytes to next code)\n";
  }
  // Generate the context model
  while (*method && ncomp<254) {
    // parse command C[N1[,N2]...] into v = {C, N1, N2...}
    std::vector<int> v;
    v.push_back(*method++);
    if (isdigit(*method)) {
      v.push_back(*method++-'0');
      while (isdigit(*method) || *method==',' || *method=='.') {
        if (isdigit(*method))
          v.back()=v.back()*10+*method++-'0';
        else {
          v.push_back(0);
          ++method;
        }
      }
    }
    // c: context model
    // N1%1000: 0=ICM 1..256=CM limit N1-1
    // N1/1000: number of times to halve memory
    // N2: 1..255=offset mod N2. 1000..1255=distance to N2-1000
    // N3...: 0..255=byte mask + 256=lz77 state. 1000+=run of N3-1000 zeros.
    if (v[0]=='c') {
      while (v.size()<3) v.push_back(0);
      comp+=itos(ncomp)+" ";
      sb=11;  // count context bits
      if (v[2]<256) sb+=lg(v[2]);
      else sb+=6;
      for (unsigned i=3; i<v.size(); ++i)
        if (v[i]<512) sb+=nbits(v[i])*3/4;
      if (sb>membits) sb=membits;
      if (v[1]%1000==0) comp+="icm "+itos(sb-6-v[1]/1000)+"\n";
      else comp+="cm "+itos(sb-2-v[1]/1000)+" "+itos(v[1]%1000-1)+"\n";
      // special contexts
      hcomp+="d= "+itos(ncomp)+" *d=0\n";
      if (v[2]>1 && v[2]<=255) {  // periodic context
        if (lg(v[2])!=lg(v[2]-1))
          hcomp+="a=c a&= "+itos(v[2]-1)+" hashd\n";
        else
          hcomp+="a=c a%= "+itos(v[2])+" hashd\n";
      }
      else if (v[2]>=1000 && v[2]<=1255)  // distance context
        hcomp+="a= 255 a+= "+itos(v[2]-1000)+
               " d=a a=*d a-=c a> 255 if a= 255 endif d= "+
               itos(ncomp)+" hashd\n";
      // Masked context
      for (unsigned i=3; i<v.size(); ++i) {
        if (i==3) hcomp+="b=c ";
        if (v[i]==255)
          hcomp+="a=*b hashd\n";  // ordinary byte
        else if (v[i]>0 && v[i]<255)
          hcomp+="a=*b a&= "+itos(v[i])+" hashd\n";  // masked byte
        else if (v[i]>=256 && v[i]<512) { // lz77 state or masked literal byte
          hcomp+=
          "a=r 1 a> 1 if\n"  // expect literal or offset
          "  a=r 2 a< 64 if\n"  // expect literal
          "    a=*b ";
          if (v[i]<511) hcomp+="a&= "+itos(v[i]-256);
          hcomp+=" hashd\n"
          "  else\n"  // expect match offset byte
          "    a>>= 6 hashd a=r 1 hashd\n"
          "  endif\n"
          "else\n"  // expect new code
          "  a= 255 hashd a=r 2 hashd\n"
          "endif\n";
        }
        else if (v[i]>=1256)  // skip v[i]-1000 bytes
          hcomp+="a= "+itos(((v[i]-1000)>>8)&255)+" a<<= 8 a+= "
               +itos((v[i]-1000)&255)+
          " a+=b b=a\n";
        else if (v[i]>1000)
          hcomp+="a= "+itos(v[i]-1000)+" a+=b b=a\n";
///        if (v[i]<512 && i<v.size()-1)
        if (i<v.size()-1)
			if (v[i]<512)
          hcomp+="b++ ";
      }
      ++ncomp;
    }
    // m,8,24: MIX, size, rate
    // t,8,24: MIX2, size, rate
    // s,8,32,255: SSE, size, start, limit
    if (strchr("mts", v[0]) && ncomp>int(v[0]=='t')) {
      if (v.size()<=1) v.push_back(8);
      if (v.size()<=2) v.push_back(24+8*(v[0]=='s'));
      if (v[0]=='s' && v.size()<=3) v.push_back(255);
      comp+=itos(ncomp);
      sb=5+v[1]*3/4;
      if (v[0]=='m')
        comp+=" mix "+itos(v[1])+" 0 "+itos(ncomp)+" "+itos(v[2])+" 255\n";
      else if (v[0]=='t')
        comp+=" mix2 "+itos(v[1])+" "+itos(ncomp-1)+" "+itos(ncomp-2)
            +" "+itos(v[2])+" 255\n";
      else // s
        comp+=" sse "+itos(v[1])+" "+itos(ncomp-1)+" "+itos(v[2])+" "
            +itos(v[3])+"\n";
      if (v[1]>8) {
        hcomp+="d= "+itos(ncomp)+" *d=0 b=c a=0\n";
        for (; v[1]>=16; v[1]-=8) {
          hcomp+="a<<= 8 a+=*b";
          if (v[1]>16) hcomp+=" b++";
          hcomp+="\n";
        }
        if (v[1]>8)
          hcomp+="a<<= 8 a+=*b a>>= "+itos(16-v[1])+"\n";
        hcomp+="a<<= 8 *d=a\n";
      }
      ++ncomp;
    }
    // i: ISSE chain with order increasing by N1,N2...
    if (v[0]=='i' && ncomp>0) {
      assert(sb>=5);
      hcomp+="d= "+itos(ncomp-1)+" b=c a=*d d++\n";
      for (unsigned i=1; i<v.size() && ncomp<254; ++i) {
        for (int j=0; j<v[i]%10; ++j) {
          hcomp+="hash ";
          if (i<v.size()-1 || j<v[i]%10-1) hcomp+="b++ ";
          sb+=6;
        }
        hcomp+="*d=a";
        if (i<v.size()-1) hcomp+=" d++";
        hcomp+="\n";
        if (sb>membits) sb=membits;
        comp+=itos(ncomp)+" isse "+itos(sb-6-v[i]/10)+" "+itos(ncomp-1)+"\n";
        ++ncomp;
      }
    }
    // a24,0,0: MATCH. N1=hash multiplier. N2,N3=halve buf, table.
    if (v[0]=='a') {
      if (v.size()<=1) v.push_back(24);
      while (v.size()<4) v.push_back(0);
      comp+=itos(ncomp)+" match "+itos(membits-v[3]-2)+" "
          +itos(membits-v[2])+"\n";
      hcomp+="d= "+itos(ncomp)+" a=*d a*= "+itos(v[1])
           +" a+=*c a++ *d=a\n";
      sb=5+(membits-v[2])*3/4;
      ++ncomp;
    }
    // w1,65,26,223,20,0: ICM-ISSE chain of length N1 with word contexts,
    // where a word is a sequence of c such that c&N4 is in N2..N2+N3-1.
    // Word is hashed by: hash := hash*N5+c+1
    // Decrease memory by 2^-N6.
    if (v[0]=='w') {
      if (v.size()<=1) v.push_back(1);
      if (v.size()<=2) v.push_back(65);
      if (v.size()<=3) v.push_back(26);
      if (v.size()<=4) v.push_back(223);
      if (v.size()<=5) v.push_back(20);
      if (v.size()<=6) v.push_back(0);
      comp+=itos(ncomp)+" icm "+itos(membits-6-v[6])+"\n";
      for (int i=1; i<v[1]; ++i)
        comp+=itos(ncomp+i)+" isse "+itos(membits-6-v[6])+" "
            +itos(ncomp+i-1)+"\n";
      hcomp+="a=*c a&= "+itos(v[4])+" a-= "+itos(v[2])+" a&= 255 a< "
           +itos(v[3])+" if\n";
      for (int i=0; i<v[1]; ++i) {
        if (i==0) hcomp+="  d= "+itos(ncomp);
        else hcomp+="  d++";
        hcomp+=" a=*d a*= "+itos(v[5])+" a+=*c a++ *d=a\n";
      }
      hcomp+="else\n";
      for (int i=v[1]-1; i>0; --i)
        hcomp+="  d= "+itos(ncomp+i-1)+" a=*d d++ *d=a\n";
      hcomp+="  d= "+itos(ncomp)+" *d=0\n"
           "endif\n";
      ncomp+=v[1]-1;
      sb=membits-v[6];
      ++ncomp;
    }
  }
  return hdr+itos(ncomp)+"\n"+comp+hcomp+"halt\n"+pcomp;
}
// Compress from in to out in 1 segment in 1 block using the algorithm
// descried in method. If method begins with a digit then choose
// a method depending on type. Save filename and comment
// in the segment header. If comment is 0 then the default is the input size
// as a decimal string, plus " jDC\x01" for a journaling method (method[0]
// is not 's'). Write the generated method to methodOut if not 0.
void compressBlock(StringBuffer* in, Writer* out, const char* method_,
                   const char* filename, const char* comment, bool dosha1) {
  assert(in);
  assert(out);
  assert(method_);
  assert(method_[0]);
  std::string method=method_;
  const unsigned n=in->size();  // input size
  const int arg0=MAX(lg(n+4095)-20, 0);  // block size
  assert((1u<<(arg0+20))>=n+4096);
  // Get type from method "LB,R,t" where L is level 0..5, B is block
  // size 0..11, R is redundancy 0..255, t = 0..3 = binary, text, exe, both.
  unsigned type=0;
  if (isdigit(method[0])) {
    int commas=0, arg[4]={0};
    for (int i=1; i<int(method.size()) && commas<4; ++i) {
      if (method[i]==',' || method[i]=='.') ++commas;
      else if (isdigit(method[i])) arg[commas]=arg[commas]*10+method[i]-'0';
    }
    if (commas==0) type=512;
    else type=arg[1]*4+arg[2];
  }
  // Get hash of input
  libzpaq::SHA1 sha1;
  const char* sha1ptr=0;
#ifdef DEBUG
  if (true) {
#else
  if (dosha1) {
#endif
    sha1.write(in->c_str(), n);
    sha1ptr=sha1.result();
  }
  // Expand default methods
  if (isdigit(method[0])) {
    const int level=method[0]-'0';
    assert(level>=0 && level<=9);
    // build models
    const int doe8=(type&2)*2;
    method="x"+itos(arg0);
    std::string htsz=","+itos(19+arg0+(arg0<=6));  // lz77 hash table size
    std::string sasz=","+itos(21+arg0);            // lz77 suffix array size
    // store uncompressed
    if (level==0)
      method="0"+itos(arg0)+",0";
    // LZ77, no model. Store if hard to compress
    else if (level==1) {
      if (type<40) method+=",0";
      else {
        method+=","+itos(1+doe8)+",";
        if      (type<80)  method+="4,0,1,15";
        else if (type<128) method+="4,0,2,16";
        else if (type<256) method+="4,0,2"+htsz;
        else if (type<960) method+="5,0,3"+htsz;
        else               method+="6,0,3"+htsz;
      }
    }
    // LZ77 with longer search
    else if (level==2) {
      if (type<32) method+=",0";
      else {
        method+=","+itos(1+doe8)+",";
        if (type<64) method+="4,0,3"+htsz;
        else method+="4,0,7"+sasz+",1";
      }
    }
    // LZ77 with CM depending on redundancy
    else if (level==3) {
      if (type<20)  // store if not compressible
        method+=",0";
      else if (type<48)  // fast LZ77 if barely compressible
        method+=","+itos(1+doe8)+",4,0,3"+htsz;
      else if (type>=640 || (type&1))  // BWT if text or highly compressible
        method+=","+itos(3+doe8)+"ci1";
      else  // LZ77 with O0-1 compression of up to 12 literals
        method+=","+itos(2+doe8)+",12,0,7"+sasz+",1c0,0,511i2";
    }
    // LZ77+CM, fast CM, or BWT depending on type
    else if (level==4) {
      if (type<12)
        method+=",0";
      else if (type<24)
        method+=","+itos(1+doe8)+",4,0,3"+htsz;
      else if (type<48)
        method+=","+itos(2+doe8)+",5,0,7"+sasz+"1c0,0,511";
      else if (type<900) {
        method+=","+itos(doe8)+"ci1,1,1,1,2a";
        if (type&1) method+="w";
        method+="m";
      }
      else
        method+=","+itos(3+doe8)+"ci1";
    }
    // Slow CM with lots of models
    else {  // 5..9
      // Model text files
      method+=","+itos(doe8);
      if (type&1) method+="w2c0,1010,255i1";
      else method+="w1i1";
      method+="c256ci1,1,1,1,1,1,2a";
      // Analyze the data
      const int NR=1<<12;
      int pt[256]={0};  // position of last occurrence
      int r[NR]={0};    // count repetition gaps of length r
      const unsigned char* p=in->data();
      if (level>0) {
        for (unsigned i=0; i<n; ++i) {
          const int k=i-pt[p[i]];
          if (k>0 && k<NR) ++r[k];
          pt[p[i]]=i;
        }
      }
      // Add periodic models
      int n1=n-r[1]-r[2]-r[3];
      for (int i=0; i<2; ++i) {
        int period=0;
        double score=0;
        int t=0;
        for (int j=5; j<NR && t<n1; ++j) {
          const double s=r[j]/(256.0+n1-t);
          if (s>score) score=s, period=j;
          t+=r[j];
        }
        if (period>4 && score>0.1) {
          method+="c0,0,"+itos(999+period)+",255i1";
          if (period<=255)
            method+="c0,"+itos(period)+"i1";
          n1-=r[period];
          r[period]=0;
        }
        else
          break;
      }
      method+="c0,2,0,255i1c0,3,0,0,255i1c0,4,0,0,0,255i1mm16ts19t0";
    }
  }
  // Compress
  std::string config;
  int args[9]={0};
  config=makeConfig(method.c_str(), args);
  assert(n<=(0x100000u<<args[0])-4096);
  libzpaq::Compressor co;
  co.setOutput(out);
#ifdef DEBUG
  co.setVerify(true);
#endif
  StringBuffer pcomp_cmd;
  co.writeTag();
  co.startBlock(config.c_str(), args, &pcomp_cmd);
  std::string cs=itos(n);
  if (comment) cs=cs+" "+comment;
  co.startSegment(filename, cs.c_str());
  if (args[1]>=1 && args[1]<=7 && args[1]!=4) {  // LZ77 or BWT
    LZBuffer lz(*in, args);
    co.setInput(&lz);
    co.compress();
  }
  else {  // compress with e8e9 or no preprocessing
    if (args[1]>=4 && args[1]<=7)
      e8e9(in->data(), in->size());
    co.setInput(in);
    co.compress();
  }
#ifdef DEBUG  // verify pre-post processing are inverses
  int64_t outsize;
  const char* sha1result=co.endSegmentChecksum(&outsize, dosha1);
  assert(sha1result);
  assert(sha1ptr);
  if (memcmp(sha1result, sha1ptr, 20)!=0)
    error("Pre/post-processor test failed");
#else
  co.endSegment(sha1ptr);
#endif
  co.endBlock();
}
}  // end namespace libzpaq
/////// main source
using std::string;
using std::vector;
using std::map;
using libzpaq::StringBuffer;
int unz(const char * archive,const char * key); // paranoid unzpaq 2.06
typedef string (*voidhelpfunction)(bool i_usage,bool i_example);
typedef map<string, voidhelpfunction> 	MAPPAHELP;
typedef map<int, 	string> 			MAPPACOMMENTI;
typedef map<string, string> 			MAPPAFILEHASH;
typedef map<string, string> 			MAPPASTRINGASTRINGA;
typedef map<int64_t,string> 			MAPPAINT64STRING;
typedef map<string,int> 			MAPPASTRINGINTEGER;
typedef map<int,int> 				MAPPAINTINT;
struct	hash_check
{
	int		algotype;
	int		checkedok;
	int		checkedfailed;
	int		checkednotfound;
	int64_t checksize;
	hash_check(): algotype(0), checkedok(0),checkedfailed(0),checkednotfound(0),checksize(0) {};
};
typedef map<string, hash_check> MAPPACHECK;
struct	hash_autocheck
{
	string	ok;
	string	calculated;
	hash_autocheck() {ok="";calculated="";};
};
typedef map<string, hash_autocheck> MAPPAAUTOCHECK;
struct s_crc32block
{
	string	filename;
	uint64_t crc32start;
	uint64_t crc32size;
	uint32_t crc32;
	s_crc32block(): crc32start(0),crc32size(0),crc32(0) {}
};
struct s_error
{
	int				counter;
	string			text;
	vector<string>	filenames;
	vector<int32_t>	attrs;
	s_error(): counter(0) {text="";}
};

typedef map<int,s_error> 	MAPPAERRORS;
/// Global variables
/// not in  for pthread that does not like class and methods
pthread_mutex_t g_mylock = PTHREAD_MUTEX_INITIALIZER;
vector<s_crc32block> 	g_crc32;
vector<uint64_t> 		g_arraybytescanned;
vector<uint64_t> 		g_arrayfilescanned;
MAPPAERRORS g_errors;

#ifdef _WIN32
///bool flagdd;
bool flagfindzpaq;
bool flagfixreserved;
bool flagimage;
bool flaglongpath;
bool flagopen;
int	 g_ConsoleCP;
int  g_ConsoleOutputCP;
#endif


bool 	flagbarraod;
bool 	flagbarraon;
bool 	flagbarraos;

vector<string>	g_addedchunklist;

int 	g_franzotype;
int		g_franzotypelen;
string 	g_optional;
string orderby;
vector<string> g_theorderby;
#ifdef	SERVER
bool	flagserver;
#endif
int 	g_ioBUFSIZE=1048576;

int		g_thechosenhash;
string	g_thechosenhash_str;

void seppuku()
{
	if (g_output_handle)
		fclose(g_output_handle);
	if (g_error_handle)
		fclose(g_error_handle);
	
	color_restore();
	exit(0);
}

bool ihavehw()
{
#ifndef HWSHA2
	return false;
#endif
	uint32_t a=0,b=0,c=0,d=0;
	getcpuid(0,0,a,b,c,d);
   	if (a<7)
	{
		myprintf("00001! cpuid cannot get at least EAX 7\n");
		return false;
	}

	bool 	supported_ssse3	=false;
	bool 	supported_sse41	=false;
	bool	supported_sha	=false;

	getcpuid(1,0,a,b,c,d);
	supported_ssse3	=(c & (1UL << 9));
	supported_sse41	=(c & (1UL << 19));
	if (flagdebug3)
		myprintf("00002: new ecx %d\n",(int)c);

	getcpuid(7,0,a,b,c,d);
	supported_sha	=(b & (1UL << 29));
	if (flagdebug3)
		myprintf("00003: new ebx %d\n",(int)b);

	if (flagdebug)
	{
		myprintf("00004: SSSE3 :");
		if (supported_ssse3)
			myprintf("OK\n");
		else
			myprintf("NO\n");

		myprintf("00005: SSE41 :");
		if (supported_sse41)
			myprintf("OK\n");
		else
			myprintf("NO\n");

		myprintf("00006: SHA   :");
		if (supported_sha)
			myprintf("OK\n");
		else
			myprintf("NO\n");
	}
	return supported_ssse3 && supported_sse41 && supported_sha;
}


string print_datetime(bool i_flagout)
{
	int hours, minutes, seconds, day, month, year;
	time_t nowz;
	time(&nowz);
	const struct tm *local = localtime(&nowz);
	if (local==NULL)
	{
		myprintf("00007! guru on tm\n");
		exit(0);
	}
	hours = local->tm_hour;
	minutes = local->tm_min;
	seconds = local->tm_sec;
	day = local->tm_mday;
	month = local->tm_mon + 1;
	year = local->tm_year + 1900;
	char buffer[40];
	snprintf(buffer,sizeof(buffer),"%02d/%02d/%d %02d:%02d:%02d ", day, month, year, hours,minutes,seconds);
	if (i_flagout)
		myprintf("%s",buffer);
	return buffer;
}

typedef map<string, bool*> 	MAPPAFLAGS;
typedef map<string, string> HELPFLAGS;






/*
	Section: hashers
*/

bool isdirectory(const string i_filename)
{
	if (i_filename.length()==0)
		return false;
	else
	return
		i_filename[i_filename.size()-1]=='/';
}

typedef string (*finalize_function)(void*); // type for conciseness

struct tipohash
{
	string 	hashname;
	string	switchname;
	string	hashdescription;
	string	franzocode;
	string	hasherror;
	int		hashlen;
	bool*	switchflag;
	bool	flagiszpaq;
	bool	flagisbenchmark;
	int		checkedok;
	int		checkedfailed;
	int		checkednotfound;
	int64_t checksize;

	finalize_function	ffinalize;

	tipohash(const string& i_hashname,int i_hashlen, const string& i_hashdescription,bool i_iszpaq,const string& i_switchname,bool* i_switchflag,finalize_function i_finalize,string i_franzcode): franzocode(""),switchflag(NULL),flagiszpaq(i_iszpaq),flagisbenchmark(false),
	checkedok(0),checkedfailed(0),checkednotfound(0),checksize(0)
	{
		hashname		=i_hashname;
		hashdescription	=i_hashdescription;
		switchname		=i_switchname;
		switchflag		=i_switchflag;
		if (i_finalize==NULL)
		{
			myprintf("00008! i_finalize NULL! %s\n",i_hashname.c_str());
		}
		
		ffinalize		=i_finalize;
		hashlen			=i_hashlen;
		franzocode		="";
		if (i_franzcode.size()==2)
			franzocode=std::string()+ i_franzcode[0]+i_franzcode[1];
		hasherror="!ERROR!";
		while (hasherror.size()<(unsigned int)i_hashlen)
			hasherror+=' ';

	}
	void resetcheckstat()
	{
		checkedok=0;
		checkedfailed=0;
		checkednotfound=0;
		checksize=0;
	}

};

typedef std::map<int, 	tipohash> MAPPATIPOHASH;

MAPPATIPOHASH g_mappatipohash;

tipohash* franz_get_hash(const string& i_hashstring)
{
	for (MAPPATIPOHASH::iterator p=g_mappatipohash.begin(); p!=g_mappatipohash.end(); ++p)
		if (i_hashstring==p->second.hashname)
			return &p->second;
	return NULL;
}




string	binarytohex(const unsigned char* i_risultato,const int i_lunghezza)
{
	if ((i_risultato==NULL) || (i_lunghezza<=0))
		return "";
	/// slow, and dirty
	string risultato="";
	char myhex[4];
	if (i_lunghezza>0)
		for (int j=0;j<i_lunghezza;j++)
		{
			snprintf(myhex,sizeof(myhex),"%02X", (unsigned char)i_risultato[j]);
			risultato.push_back(myhex[0]);
			risultato.push_back(myhex[1]);
		}
	return risultato;
}
// no to_string on old gcc
string myto_string(int64_t i_number)
{
	return myulltoa(i_number,-1);
}

string decodealgoname(int franzotype)
{
	if (franzotype==FRANZO_NONE)
		return "";
	if (franzotype==FRANZO_CRC_32)
		return "CRC-32";

	MAPPATIPOHASH::iterator p=g_mappatipohash.find(franzotype);
	if (p==g_mappatipohash.end())
	{
		string temp="13882: franzotype strange "+myto_string(franzotype);
		perror(temp.c_str());
		return "";
	}
	if (!p->second.flagiszpaq)
	{
		string temp="13888: franzotype does not seems iszpaq "+myto_string(franzotype);
		perror(temp.c_str());
		return "";
	}
	return p->second.hashname;
}

string decodefranzoffset(int franzotype)
{
	if (franzotype==FRANZO_NONE)
		return "NOTHING (LIKE 7.15)";
	if (franzotype==FRANZO_CRC_32)
		return "CRC-32";

	MAPPATIPOHASH::iterator p=g_mappatipohash.find(franzotype);
	if (p==g_mappatipohash.end())
	{
		string temp="16839: franzotype strange "+myto_string(franzotype);
		perror(temp.c_str());
		return "";
	}
	if (!p->second.flagiszpaq)
	{
		string temp="09603: franzotype does not seems iszpaq "+myto_string(franzotype);
		perror(temp.c_str());
		return "";
	}

	return p->second.hashname+"+CRC-32";
}

string emptyalgo(const string& i_string)
{
	if (i_string=="BLAKE3")
		return "AF1349B9F5F9A1A6A0404DEA36DCC9499BCB25C9ADC112B7CC9A93CAE41F3262";
	else
	if (i_string=="BLAKE3B")
		return "AF1349B9F5F9A1A6A0404DEA36DCC9499BCB25C9ADC112B7CC9A93CAE41F3262";
	else
	if (i_string=="QUICK")
		return "EF46DB3751D8E999";
	else
	if (i_string=="XXHASH64")
		return "EF46DB3751D8E999";
	else
	if (i_string=="WINHASH64")
		return "EF46DB3751D8E999";
	else
	if (i_string=="XXHASH64B")
		return "EF46DB3751D8E999";
	else
	if (i_string=="WYHASH")
		return "";
	else
	if (i_string=="CRC-32")
		return "00000000";
	else
	if (i_string=="ENTROPY")
		return "00000000";
	else
	if (i_string=="CRC-32C")
		return "00000000";
	else
	if (i_string=="XXH3")
		return "99AA06D3014798D86001C324468D497F";
	else
	if (i_string=="XXH3B")
		return "99AA06D3014798D86001C324468D497F";
	else
	if (i_string=="SHA-256")
		return "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855";
	else
	if (i_string=="SHA-256B")
		return "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855";
	else
	if (i_string=="WHIRLPOOL")
		return "19FA61D75522A4669B44E39C1D2E1726C530232130D407F89AFEE0964997F7A73E83BE698B288FEBCF88E3E03C4F0757EA8964E59B63D93708B138CC42A66EB3";
	else
	if (i_string=="MD5")
		return "D41D8CD98F00B204E9800998ECF8427E";
	else
	if (i_string=="MD5B")
		return "D41D8CD98F00B204E9800998ECF8427E";
	else
	if (i_string=="NILSIMSA")
		return "VUOTO";
	else
	if (i_string=="HIGHWAY64")
		return "92943A34E3447FAA";
	else
	if (i_string=="HIGHWAY128")
		return "D39FAE4F2222AD3611D8E8215B1B01F2";
	else
	if (i_string=="HIGHWAY256")
		return "D6EB09DBE820BC5A8050209986CEDE586E78E6815864E99DC22811C2FCFE001B";
	else
	if (i_string=="ZETA")
		return "0000000000000000";
	else
	if (i_string=="ZETAENC")
		return "0000000000000000";
	else
	if (i_string=="SHA-3")
		return "A7FFC6F8BF1ED76651C14756A061D662F580FF4DE43B49FA82D80A4B80F8434A";
	else
	if (i_string=="SHA-3B")
		return "A7FFC6F8BF1ED76651C14756A061D662F580FF4DE43B49FA82D80A4B80F8434A";
	else
		return "DA39A3EE5E6B4B0D3255BFEF95601890AFD80709"; //sha1
	
}

/// LICENSE_START.8

/*
	Experimental "antihash" function: sort "similar" files
*/
/*!
 * The MIT License (MIT)
 * =====================
 *
 * Copyright 2017 Sepehr Laal
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
*/
/*!
 * @fn nilsimsa_compute
 * @brief computes the Nilsimsa hash of the given data.
 * @arg data [IN] input data byte (char) array
 * @arg size [IN] input data size
 * @arg out [OUT] output hash string (must be allocated
 * to hold exactly 64 characters + 1 null character. So
 * total of 65 characters e.g. char hash[65])
 */
void nilsimsa_compute(const char* data, int size, char* out);
/*!
 * @fn nilsimsa_compare
 * @brief compares two given Nilsimsa hashes
 * @returns A score between -127 and 128 where -127 means
 * completely uncorrelated data and 128 means same data.
 * @arg lhs [IN] left hand side hash
 * @arg rhs [IN] right hand side hash
 */
static int __tran3(int a, int b, int c, int n);
static const char* __int_to_hexchar(int num);
#define ACCUM_LENGTH 256
#define DIGEST_LENGTH 32
#define WINDOW_LENGTH 4
void nilsimsa_compute(const char* data, int n, char* out)
{
    int accum[ACCUM_LENGTH] = { 0 };
    int digest[DIGEST_LENGTH] = { 0 };
    int window[WINDOW_LENGTH] = { -1, -1, -1, -1 };
    for (int i = 0; i < n; ++i)
    {
        int ch = data[i] & 0xFF;
        if (window[1] > -1)
        {
            accum[__tran3(ch, window[0], window[1], 0)] += 1;
        }
        if (window[2] > -1)
        {
            accum[__tran3(ch, window[0], window[2], 1)] += 1;
            accum[__tran3(ch, window[1], window[2], 2)] += 1;
        }
        if (window[3] > -1)
        {
            accum[__tran3(ch, window[0], window[3], 3)] += 1;
            accum[__tran3(ch, window[1], window[3], 4)] += 1;
            accum[__tran3(ch, window[2], window[3], 5)] += 1;
            accum[__tran3(window[3], window[0], ch, 6)] += 1;
            accum[__tran3(window[3], window[2], ch, 7)] += 1;
        }
        window[3] = window[2];
        window[2] = window[1];
        window[1] = window[0];
        window[0] = ch;
    }
    int total = 0;
    if (n == 3)
        total = 1;
    else if (n == 4)
        total = 4;
    else if (n > 4)
        total = 8 * n - 28;
    int threshold = total / ACCUM_LENGTH;
    for (int i = 0; i < ACCUM_LENGTH; i++)
        if (accum[i] > threshold)
            digest[i >> 3] += 1 << (i & 7);
    int rev_index;
    for (int i = 0; i < DIGEST_LENGTH; ++i)
    {
        rev_index = DIGEST_LENGTH - i - 1;
        out[2*i  ] = __int_to_hexchar(digest[rev_index])[0];
        out[2*i+1] = __int_to_hexchar(digest[rev_index])[1];
    }
    out[2 * DIGEST_LENGTH] = '\0';
}
// This is a precomputed constant for the standard Nilsimsa "53"-based transition table.
static const int __TRAN53[] =
{
    0x02, 0xD6, 0x9E, 0x6F, 0xF9, 0x1D, 0x04, 0xAB, 0xD0, 0x22, 0x16, 0x1F, 0xD8, 0x73, 0xA1, 0xAC,
    0x3B, 0x70, 0x62, 0x96, 0x1E, 0x6E, 0x8F, 0x39, 0x9D, 0x05, 0x14, 0x4A, 0xA6, 0xBE, 0xAE, 0x0E,
    0xCF, 0xB9, 0x9C, 0x9A, 0xC7, 0x68, 0x13, 0xE1, 0x2D, 0xA4, 0xEB, 0x51, 0x8D, 0x64, 0x6B, 0x50,
    0x23, 0x80, 0x03, 0x41, 0xEC, 0xBB, 0x71, 0xCC, 0x7A, 0x86, 0x7F, 0x98, 0xF2, 0x36, 0x5E, 0xEE,
    0x8E, 0xCE, 0x4F, 0xB8, 0x32, 0xB6, 0x5F, 0x59, 0xDC, 0x1B, 0x31, 0x4C, 0x7B, 0xF0, 0x63, 0x01,
    0x6C, 0xBA, 0x07, 0xE8, 0x12, 0x77, 0x49, 0x3C, 0xDA, 0x46, 0xFE, 0x2F, 0x79, 0x1C, 0x9B, 0x30,
    0xE3, 0x00, 0x06, 0x7E, 0x2E, 0x0F, 0x38, 0x33, 0x21, 0xAD, 0xA5, 0x54, 0xCA, 0xA7, 0x29, 0xFC,
    0x5A, 0x47, 0x69, 0x7D, 0xC5, 0x95, 0xB5, 0xF4, 0x0B, 0x90, 0xA3, 0x81, 0x6D, 0x25, 0x55, 0x35,
    0xF5, 0x75, 0x74, 0x0A, 0x26, 0xBF, 0x19, 0x5C, 0x1A, 0xC6, 0xFF, 0x99, 0x5D, 0x84, 0xAA, 0x66,
    0x3E, 0xAF, 0x78, 0xB3, 0x20, 0x43, 0xC1, 0xED, 0x24, 0xEA, 0xE6, 0x3F, 0x18, 0xF3, 0xA0, 0x42,
    0x57, 0x08, 0x53, 0x60, 0xC3, 0xC0, 0x83, 0x40, 0x82, 0xD7, 0x09, 0xBD, 0x44, 0x2A, 0x67, 0xA8,
    0x93, 0xE0, 0xC2, 0x56, 0x9F, 0xD9, 0xDD, 0x85, 0x15, 0xB4, 0x8A, 0x27, 0x28, 0x92, 0x76, 0xDE,
    0xEF, 0xF8, 0xB2, 0xB7, 0xC9, 0x3D, 0x45, 0x94, 0x4B, 0x11, 0x0D, 0x65, 0xD5, 0x34, 0x8B, 0x91,
    0x0C, 0xFA, 0x87, 0xE9, 0x7C, 0x5B, 0xB1, 0x4D, 0xE5, 0xD4, 0xCB, 0x10, 0xA2, 0x17, 0x89, 0xBC,
    0xDB, 0xB0, 0xE2, 0x97, 0x88, 0x52, 0xF7, 0x48, 0xD3, 0x61, 0x2C, 0x3A, 0x2B, 0xD1, 0x8C, 0xFB,
    0xF1, 0xCD, 0xE4, 0x6A, 0xE7, 0xA9, 0xFD, 0xC4, 0x37, 0xC8, 0xD2, 0xF6, 0xDF, 0x58, 0x72, 0x4E
};
/*
* Used to convert integers to 1-byte hex strings
* Integers in between the range of 0 and 256.
*/
static const char* __HEX_BYTE_DIGITS[] =
{
    "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
    "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
    "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
    "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
    "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
    "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
    "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
    "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
    "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
    "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
    "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
    "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
    "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
    "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
    "E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
    "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF"
};
static int __tran3(int a, int b, int c, int n)
{
    return (((__TRAN53[(a + n) & 255] ^ __TRAN53[b] * (n + n + 1)) + __TRAN53[c ^ __TRAN53[n]]) & 255);
}
static const char* __int_to_hexchar(int num)
{
    return __HEX_BYTE_DIGITS[num];
}
/// LICENSE_END.8



/// LICENSE_START.18
/// This is a (minor) rework of https://github.com/google/highwayhash/tree/master/c
/*
By Jan Wassenberg jan.wassenberg@gmail.com and Jyrki Alakuijala jyrki.alakuijala@gmail.com

This is not an official Google product.
*/


/*////////////////////////////////////////////////////////////////////////////*/
/* Low-level API, use for implementing streams etc...                         */
/*////////////////////////////////////////////////////////////////////////////*/

typedef struct {
  uint64_t v0[4];
  uint64_t v1[4];
  uint64_t mul0[4];
  uint64_t mul1[4];
} HighwayHashState;

/* Initializes state with given key */
static void HighwayHashReset(const uint64_t key[4], HighwayHashState* state);
/* Takes a packet of 32 bytes */
void HighwayHashUpdatePacket(const uint8_t* packet, HighwayHashState* state);
/* Adds the final 1..31 bytes, do not use if 0 remain */
void HighwayHashUpdateRemainder(const uint8_t* bytes, const size_t size_mod32,
                                HighwayHashState* state);
/* Compute final hash value. Makes state invalid. */
static uint64_t HighwayHashFinalize64(HighwayHashState* state);
static void HighwayHashFinalize128(HighwayHashState* state, uint64_t hash[2]);
static void HighwayHashFinalize256(HighwayHashState* state, uint64_t hash[4]);

/*////////////////////////////////////////////////////////////////////////////*/
/* Non-cat API: single call on full data                                      */
/*////////////////////////////////////////////////////////////////////////////*/

/*////////////////////////////////////////////////////////////////////////////*/
/* Cat API: allows appending with multiple calls                              */
/*////////////////////////////////////////////////////////////////////////////*/

typedef struct {
  HighwayHashState state;
  uint8_t packet[32];
  int num;
} HighwayHashCat;

/* Allocates new state for a new streaming hash computation */
void HighwayHashCatStart(const uint64_t key[4], HighwayHashCat* state);

void HighwayHashCatAppend(const uint8_t* bytes, size_t num,
                          HighwayHashCat* state);

/* Computes final hash value */
uint64_t HighwayHashCatFinish64(const HighwayHashCat* state);
void HighwayHashCatFinish128(const HighwayHashCat* state, uint64_t hash[2]);
void HighwayHashCatFinish256(const HighwayHashCat* state, uint64_t hash[4]);



/*
This code is compatible with C90 with the additional requirement of
supporting uint64_t.
*/

/*////////////////////////////////////////////////////////////////////////////*/
/* Internal implementation                                                    */
/*////////////////////////////////////////////////////////////////////////////*/

/// this is a weird "fix" for gcc bug https://github.com/fcorbelli/zpaqfranz/issues/71
/// please don't ask anything, took a couple of hours

void fixgcc(const char* fmt, ...)
{
   va_list args;
   va_start(args, fmt);
   va_end(args);
}

void HighwayHashReset(const uint64_t key[4], HighwayHashState* state) {
  state->mul0[0] = 0xdbe6d5d5fe4cce2full;
  state->mul0[1] = 0xa4093822299f31d0ull;
  state->mul0[2] = 0x13198a2e03707344ull;
  state->mul0[3] = 0x243f6a8885a308d3ull;
  state->mul1[0] = 0x3bd39e10cb0ef593ull;
  state->mul1[1] = 0xc0acf169b5f18a8cull;
  state->mul1[2] = 0xbe5466cf34e90c6cull;
  state->mul1[3] = 0x452821e638d01377ull;
  state->v0[0] = state->mul0[0] ^ key[0];
  state->v0[1] = state->mul0[1] ^ key[1];
  state->v0[2] = state->mul0[2] ^ key[2];
  state->v0[3] = state->mul0[3] ^ key[3];
  state->v1[0] = state->mul1[0] ^ ((key[0] >> 32) | (key[0] << 32));
  state->v1[1] = state->mul1[1] ^ ((key[1] >> 32) | (key[1] << 32));
  state->v1[2] = state->mul1[2] ^ ((key[2] >> 32) | (key[2] << 32));
	fixgcc("ciao");
  state->v1[3] = state->mul1[3] ^ ((key[3] >> 32) | (key[3] << 32));
}

static void ZipperMergeAndAdd(const uint64_t v1, const uint64_t v0,
                              uint64_t* add1, uint64_t* add0) {
  *add0 += (((v0 & 0xff000000ull) | (v1 & 0xff00000000ull)) >> 24) |
           (((v0 & 0xff0000000000ull) | (v1 & 0xff000000000000ull)) >> 16) |
           (v0 & 0xff0000ull) | ((v0 & 0xff00ull) << 32) |
           ((v1 & 0xff00000000000000ull) >> 8) | (v0 << 56);
  *add1 += (((v1 & 0xff000000ull) | (v0 & 0xff00000000ull)) >> 24) |
           (v1 & 0xff0000ull) | ((v1 & 0xff0000000000ull) >> 16) |
           ((v1 & 0xff00ull) << 24) | ((v0 & 0xff000000000000ull) >> 8) |
           ((v1 & 0xffull) << 48) | (v0 & 0xff00000000000000ull);
}

static void Update(const uint64_t lanes[4], HighwayHashState* state) {
  int i;
  for (i = 0; i < 4; ++i) {
    state->v1[i] += state->mul0[i] + lanes[i];
    state->mul0[i] ^= (state->v1[i] & 0xffffffff) * (state->v0[i] >> 32);
    state->v0[i] += state->mul1[i];
    state->mul1[i] ^= (state->v0[i] & 0xffffffff) * (state->v1[i] >> 32);
  }
  ZipperMergeAndAdd(state->v1[1], state->v1[0], &state->v0[1], &state->v0[0]);
  ZipperMergeAndAdd(state->v1[3], state->v1[2], &state->v0[3], &state->v0[2]);
  ZipperMergeAndAdd(state->v0[1], state->v0[0], &state->v1[1], &state->v1[0]);
  ZipperMergeAndAdd(state->v0[3], state->v0[2], &state->v1[3], &state->v1[2]);
}

static uint64_t Read64(const uint8_t* src) {
  return (uint64_t)src[0] | ((uint64_t)src[1] << 8) |
      ((uint64_t)src[2] << 16) | ((uint64_t)src[3] << 24) |
      ((uint64_t)src[4] << 32) | ((uint64_t)src[5] << 40) |
      ((uint64_t)src[6] << 48) | ((uint64_t)src[7] << 56);
}

void HighwayHashUpdatePacket(const uint8_t* packet, HighwayHashState* state) {
  uint64_t lanes[4];
  lanes[0] = Read64(packet + 0);
  lanes[1] = Read64(packet + 8);
  lanes[2] = Read64(packet + 16);
  lanes[3] = Read64(packet + 24);
  Update(lanes, state);
}

static void Rotate32By(uint64_t count, uint64_t lanes[4]) {
  int i;
  for (i = 0; i < 4; ++i) {
    uint32_t half0 = lanes[i] & 0xffffffff;
    uint32_t half1 = (lanes[i] >> 32);
    lanes[i] = (half0 << count) | (half0 >> (32 - count));
    lanes[i] |= (uint64_t)((half1 << count) | (half1 >> (32 - count))) << 32;
  }
}

void HighwayHashUpdateRemainder(const uint8_t* bytes, const size_t size_mod32,
                                HighwayHashState* state) {
  int i;
  const size_t size_mod4 = size_mod32 & 3;
  const uint8_t* remainder = bytes + (size_mod32 & ~3);
  uint8_t packet[32] = {0};
  for (i = 0; i < 4; ++i) {
    state->v0[i] += ((uint64_t)size_mod32 << 32) + size_mod32;
  }
  Rotate32By(size_mod32, state->v1);
  for (i = 0; i < remainder - bytes; i++) {
    packet[i] = bytes[i];
  }
  if (size_mod32 & 16) {
    for (i = 0; i < 4; i++) {
      packet[28 + i] = remainder[i + size_mod4 - 4];
    }
  } else {
    if (size_mod4) {
      packet[16 + 0] = remainder[0];
      packet[16 + 1] = remainder[size_mod4 >> 1];
      packet[16 + 2] = remainder[size_mod4 - 1];
    }
  }
  HighwayHashUpdatePacket(packet, state);
}

static void Permute(const uint64_t v[4], uint64_t* permuted) {
  permuted[0] = (v[2] >> 32) | (v[2] << 32);
  permuted[1] = (v[3] >> 32) | (v[3] << 32);
  permuted[2] = (v[0] >> 32) | (v[0] << 32);
  permuted[3] = (v[1] >> 32) | (v[1] << 32);
}

void PermuteAndUpdate(HighwayHashState* state) {
  uint64_t permuted[4];
  Permute(state->v0, permuted);
  Update(permuted, state);
}

static void ModularReduction(uint64_t a3_unmasked, uint64_t a2, uint64_t a1,
                             uint64_t a0, uint64_t* m1, uint64_t* m0) {
  uint64_t a3 = a3_unmasked & 0x3FFFFFFFFFFFFFFFull;
  *m1 = a1 ^ ((a3 << 1) | (a2 >> 63)) ^ ((a3 << 2) | (a2 >> 62));
  *m0 = a0 ^ (a2 << 1) ^ (a2 << 2);
}

static uint64_t HighwayHashFinalize64(HighwayHashState* state) {
  int i;
  for (i = 0; i < 4; i++) {
    PermuteAndUpdate(state);
  }

  return state->v0[0] + state->v1[0] + state->mul0[0] + state->mul1[0];
}

static void HighwayHashFinalize128(HighwayHashState* state, uint64_t hash[2]) {
  int i;

  for (i = 0; i < 6; i++) {
    PermuteAndUpdate(state);
  }

  hash[0] = state->v0[0] + state->mul0[0] + state->v1[2] + state->mul1[2];
  hash[1] = state->v0[1] + state->mul0[1] + state->v1[3] + state->mul1[3];
}

static void HighwayHashFinalize256(HighwayHashState* state, uint64_t hash[4]) {
  int i;
  /* We anticipate that 256-bit hashing will be mostly used with long messages
     because storing and using the 256-bit hash (in contrast to 128-bit)
     carries a larger additional constant cost by itself. Doing extra rounds
     here hardly increases the per-byte cost of long messages. */
  for (i = 0; i < 10; i++) {
    PermuteAndUpdate(state);
  }
  ModularReduction(state->v1[1] + state->mul1[1], state->v1[0] + state->mul1[0],
                   state->v0[1] + state->mul0[1], state->v0[0] + state->mul0[0],
                   &hash[1], &hash[0]);
  ModularReduction(state->v1[3] + state->mul1[3], state->v1[2] + state->mul1[2],
                   state->v0[3] + state->mul0[3], state->v0[2] + state->mul0[2],
                   &hash[3], &hash[2]);
}



/*////////////////////////////////////////////////////////////////////////////*/
/* Cat API: allows appending with multiple calls                              */
/*////////////////////////////////////////////////////////////////////////////*/

void HighwayHashCatStart(const uint64_t key[4], HighwayHashCat* state) {
  HighwayHashReset(key, &state->state);
  state->num = 0;
}

void HighwayHashCatAppend(const uint8_t* bytes, size_t num,
                          HighwayHashCat* state) {
  size_t i;
  if (state->num != 0) {
    size_t num_add = num > (32u - state->num) ? (32u - state->num) : num;
    for (i = 0; i < num_add; i++) {
      state->packet[state->num + i] = bytes[i];
    }
    state->num += num_add;
    num -= num_add;
    bytes += num_add;
    if (state->num == 32) {
      HighwayHashUpdatePacket(state->packet, &state->state);
      state->num = 0;
    }
  }
  while (num >= 32) {
    HighwayHashUpdatePacket(bytes, &state->state);
    num -= 32;
    bytes += 32;
  }
  for (i = 0; i < num; i++) {
    state->packet[state->num] = bytes[i];
    state->num++;
  }
}

uint64_t HighwayHashCatFinish64(const HighwayHashCat* state) {
  HighwayHashState copy = state->state;
  if (state->num) {
    HighwayHashUpdateRemainder(state->packet, state->num, &copy);
  }
  return HighwayHashFinalize64(&copy);
}

void HighwayHashCatFinish128(const HighwayHashCat* state, uint64_t hash[2]) {
  HighwayHashState copy = state->state;
  if (state->num) {
    HighwayHashUpdateRemainder(state->packet, state->num, &copy);
  }
  HighwayHashFinalize128(&copy, hash);
}

void HighwayHashCatFinish256(const HighwayHashCat* state, uint64_t hash[4]) {
  HighwayHashState copy = state->state;
  if (state->num) {
    HighwayHashUpdateRemainder(state->packet, state->num, &copy);
  }
  HighwayHashFinalize256(&copy, hash);
}
/// LICENSE_END.18



/// LICENSE_START.11
// //////////////////////////////////////////////////////////
// sha3.h
// Copyright (c) 2014,2015 Stephan Brumme. All rights reserved.
// see http://create.stephan-brumme.com/disclaimer.html
//
class SHA3
{
public:
  enum Bits { Bits224 = 224, Bits256 = 256, Bits384 = 384, Bits512 = 512 };
  explicit SHA3(Bits bits = Bits256);
  void add(const void* data, size_t numBytes);
  std::string getHash();
  void reset();
private:
  void processBlock(const void* data);
  void processBuffer();
  /// 1600 bits, stored as 25x64 bit, BlockSize is no more than 1152 bits (Keccak224)
  enum { StateSize    = 1600 / (8 * 8),
         MaxBlockSize =  200 - 2 * (224 / 8) };
  /// hash
  uint64_t m_hash[StateSize];
  /// size of processed data in bytes
  uint64_t m_numBytes;
  /// block size (less or equal to MaxBlockSize)
  size_t   m_blockSize;
  /// valid bytes in m_buffer
  size_t   m_bufferSize;
  /// bytes not processed yet
  uint8_t  m_buffer[MaxBlockSize];
  /// variant
  Bits     m_bits;
};
SHA3::SHA3(Bits bits)
: m_blockSize(200 - 2 * (bits / 8)),
  m_bits(bits)
{
  reset();
}
void SHA3::reset()
{
  for (size_t i = 0; i < StateSize; i++)
    m_hash[i] = 0;
  m_numBytes   = 0;
  m_bufferSize = 0;
}
namespace
{
  const unsigned int Rounds = 24;
  const uint64_t XorMasks[Rounds] =
  {
    0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
    0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
    0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL,
    0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
    0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL,
    0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
    0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL,
    0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
  };
  inline uint64_t rotateLeft(uint64_t x, uint8_t numBits)
  {
    return (x << numBits) | (x >> (64 - numBits));
  }
  /// convert litte vs big endian
#ifdef BIG
  inline uint64_t swap(uint64_t x)
  {
#ifdef ANCIENT
  return  (x >> 56) |
           ((x >> 40) & 0x000000000000FF00ULL) |
           ((x >> 24) & 0x0000000000FF0000ULL) |
           ((x >>  8) & 0x00000000FF000000ULL) |
           ((x <<  8) & 0x000000FF00000000ULL) |
           ((x << 24) & 0x0000FF0000000000ULL) |
           ((x << 40) & 0x00FF000000000000ULL) |
            (x << 56);
#else
    return __builtin_bswap64(x);
#endif
  }
#endif
  unsigned int mod5(unsigned int x)
  {
    if (x < 5)
      return x;
    return x - 5;
  }
}
void SHA3::processBlock(const void* data)
{
#ifdef BIG
#define MYLITTLEENDIAN2(x) swap(x)
#else
#define MYLITTLEENDIAN2(x) (x)
#endif
  const uint64_t* data64 = (const uint64_t*) data;
  for (unsigned int i = 0; i < m_blockSize / 8; i++)
    m_hash[i] ^= MYLITTLEENDIAN2(data64[i]);
  for (unsigned int round = 0; round < Rounds; round++)
  {
    uint64_t coefficients[5];
    for (unsigned int i = 0; i < 5; i++)
      coefficients[i] = m_hash[i] ^ m_hash[i + 5] ^ m_hash[i + 10] ^ m_hash[i + 15] ^ m_hash[i + 20];
    for (unsigned int i = 0; i < 5; i++)
    {
      uint64_t one = coefficients[mod5(i + 4)] ^ rotateLeft(coefficients[mod5(i + 1)], 1);
      m_hash[i     ] ^= one;
      m_hash[i +  5] ^= one;
      m_hash[i + 10] ^= one;
      m_hash[i + 15] ^= one;
      m_hash[i + 20] ^= one;
    }
    uint64_t one;
    uint64_t last = m_hash[1];
    one = m_hash[10]; m_hash[10] = rotateLeft(last,  1); last = one;
    one = m_hash[ 7]; m_hash[ 7] = rotateLeft(last,  3); last = one;
    one = m_hash[11]; m_hash[11] = rotateLeft(last,  6); last = one;
    one = m_hash[17]; m_hash[17] = rotateLeft(last, 10); last = one;
    one = m_hash[18]; m_hash[18] = rotateLeft(last, 15); last = one;
    one = m_hash[ 3]; m_hash[ 3] = rotateLeft(last, 21); last = one;
    one = m_hash[ 5]; m_hash[ 5] = rotateLeft(last, 28); last = one;
    one = m_hash[16]; m_hash[16] = rotateLeft(last, 36); last = one;
    one = m_hash[ 8]; m_hash[ 8] = rotateLeft(last, 45); last = one;
    one = m_hash[21]; m_hash[21] = rotateLeft(last, 55); last = one;
    one = m_hash[24]; m_hash[24] = rotateLeft(last,  2); last = one;
    one = m_hash[ 4]; m_hash[ 4] = rotateLeft(last, 14); last = one;
    one = m_hash[15]; m_hash[15] = rotateLeft(last, 27); last = one;
    one = m_hash[23]; m_hash[23] = rotateLeft(last, 41); last = one;
    one = m_hash[19]; m_hash[19] = rotateLeft(last, 56); last = one;
    one = m_hash[13]; m_hash[13] = rotateLeft(last,  8); last = one;
    one = m_hash[12]; m_hash[12] = rotateLeft(last, 25); last = one;
    one = m_hash[ 2]; m_hash[ 2] = rotateLeft(last, 43); last = one;
    one = m_hash[20]; m_hash[20] = rotateLeft(last, 62); last = one;
    one = m_hash[14]; m_hash[14] = rotateLeft(last, 18); last = one;
    one = m_hash[22]; m_hash[22] = rotateLeft(last, 39); last = one;
    one = m_hash[ 9]; m_hash[ 9] = rotateLeft(last, 61); last = one;
    one = m_hash[ 6]; m_hash[ 6] = rotateLeft(last, 20); last = one;
                      m_hash[ 1] = rotateLeft(last, 44);
    for (unsigned int j = 0; j < StateSize; j += 5)
    {
      uint64_t one = m_hash[j];
      uint64_t two = m_hash[j + 1];
      m_hash[j]     ^= m_hash[j + 2] & ~two;
      m_hash[j + 1] ^= m_hash[j + 3] & ~m_hash[j + 2];
      m_hash[j + 2] ^= m_hash[j + 4] & ~m_hash[j + 3];
      m_hash[j + 3] ^=      one      & ~m_hash[j + 4];
      m_hash[j + 4] ^=      two      & ~one;
    }
    m_hash[0] ^= XorMasks[round];
  }
}
void SHA3::add(const void* data, size_t numBytes)
{
  const uint8_t* current = (const uint8_t*) data;
  // copy data to buffer
  if (m_bufferSize > 0)
    while (numBytes > 0 && m_bufferSize < m_blockSize)
    {
      m_buffer[m_bufferSize++] = *current++;
      numBytes--;
    }
  // full buffer
  if (m_bufferSize == m_blockSize)
  {
    processBlock((void*)m_buffer);
    m_numBytes  += m_blockSize;
    m_bufferSize = 0;
  }
  // no more data ?
  if (numBytes == 0)
    return;
  // process full blocks
  while (numBytes >= m_blockSize)
  {
    processBlock(current);
    current    += m_blockSize;
    m_numBytes += m_blockSize;
    numBytes   -= m_blockSize;
  }
  // keep remaining bytes in buffer
  while (numBytes > 0)
  {
    m_buffer[m_bufferSize++] = *current++;
    numBytes--;
  }
}
void SHA3::processBuffer()
{
  // add padding
  size_t offset = m_bufferSize;
  // add a "1" byte
  m_buffer[offset++] = 0x06;
  // fill with zeros
  while (offset < m_blockSize)
    m_buffer[offset++] = 0;
  // and add a single set bit
  m_buffer[offset - 1] |= 0x80;
  processBlock(m_buffer);
}
std::string SHA3::getHash()
{
  // save hash state
  uint64_t oldHash[StateSize];
  for (unsigned int i = 0; i < StateSize; i++)
    oldHash[i] = m_hash[i];
  processBuffer();
  static const char dec2hex[16 + 1] = "0123456789ABCDEF";
  // number of significant elements in hash (uint64_t)
  unsigned int hashLength = m_bits / 64;
  std::string result;
  result.reserve(m_bits / 4);
  for (unsigned int i = 0; i < hashLength; i++)
    for (unsigned int j = 0; j < 8; j++) // 64 bits => 8 bytes
    {
      // convert a byte to hex
      unsigned char oneByte = (unsigned char) (m_hash[i] >> (8 * j));
      result += dec2hex[oneByte >> 4];
      result += dec2hex[oneByte & 15];
    }
  // SHA3-224's last entry in m_hash provides only 32 bits instead of 64 bits
  unsigned int remainder = m_bits - hashLength * 64;
  unsigned int processed = 0;
  while (processed < remainder)
  {
    // convert a byte to hex
    unsigned char oneByte = (unsigned char) (m_hash[hashLength] >> processed);
    result += dec2hex[oneByte >> 4];
    result += dec2hex[oneByte & 15];
    processed += 8;
  }
  // restore state
  for (unsigned int i = 0; i < StateSize; i++)
    m_hash[i] = oldHash[i];
  return result;
}
// //////////////////////////////////////////////////////////
// md5.h
// Copyright (c) 2014 Stephan Brumme. All rights reserved.
// see http://create.stephan-brumme.com/disclaimer.html
//
/** Usage:
    MD5 md5;
    while (more data available)
      md5.add(pointer to fresh data, number of new bytes);
    std::string myHash3 = md5.getHash();
  */
class MD5
{
public:
  /// split into 64 byte blocks (=> 512 bits), hash is 16 bytes long
  enum { BlockSize = 512 / 8, HashBytes = 16 };
  MD5();
  void add(const void* data, size_t numBytes);
  std::string getHash();
  void getHash(unsigned char buffer[HashBytes]);
  void reset();
private:
  void processBlock(const void* data);
  void processBuffer();
  /// size of processed data in bytes
  uint64_t m_numBytes;
  /// valid bytes in m_buffer
  size_t   m_bufferSize;
  /// bytes not processed yet
  uint8_t  m_buffer[BlockSize];
  enum { HashValues = HashBytes / 4 };
  /// hash, stored as integers
  uint32_t m_hash[HashValues];
};
/// same as reset()
MD5::MD5()
{
  reset();
}
/// restart
void MD5::reset()
{
  m_numBytes   = 0;
  m_bufferSize = 0;
  // according to RFC 1321
  m_hash[0] = 0x67452301;
  m_hash[1] = 0xefcdab89;
  m_hash[2] = 0x98badcfe;
  m_hash[3] = 0x10325476;
}
namespace
{
  // mix functions for processBlock()
  inline uint32_t f1(uint32_t b, uint32_t c, uint32_t d)
  {
    return d ^ (b & (c ^ d)); // original: f = (b & c) | ((~b) & d);
  }
  inline uint32_t f2(uint32_t b, uint32_t c, uint32_t d)
  {
    return c ^ (d & (b ^ c)); // original: f = (b & d) | (c & (~d));
  }
  inline uint32_t f3(uint32_t b, uint32_t c, uint32_t d)
  {
    return b ^ c ^ d;
  }
  inline uint32_t f4(uint32_t b, uint32_t c, uint32_t d)
  {
    return c ^ (b | ~d);
  }
  inline uint32_t rotate(uint32_t a, uint32_t c)
  {
    return (a << c) | (a >> (32 - c));
  }
#if defined(__BYTE_ORDER) && (__BYTE_ORDER != 0) && (__BYTE_ORDER == __BIG_ENDIAN)
  static inline uint32_t swap(uint32_t x)
  {
#ifdef ANCIENT
    return (x >> 24) |
          ((x >>  8) & 0x0000FF00) |
          ((x <<  8) & 0x00FF0000) |
           (x << 24);
#else
    return __builtin_bswap32(x);
#endif
  }

#endif
}
/// process 64 bytes
void MD5::processBlock(const void* data)
{
  // get last hash
  uint32_t a = m_hash[0];
  uint32_t b = m_hash[1];
  uint32_t c = m_hash[2];
  uint32_t d = m_hash[3];
  // data represented as 16x 32-bit words
  const uint32_t* words = (uint32_t*) data;
  // computations are little endian, swap data if necessary
#if defined(__BYTE_ORDER) && (__BYTE_ORDER != 0) && (__BYTE_ORDER == __BIG_ENDIAN)
#define MYLITTLEENDIAN(x) swap(x)
#else
#define MYLITTLEENDIAN(x) (x)
#endif
  // first round
  uint32_t word0  = MYLITTLEENDIAN(words[ 0]);
  a = rotate(a + f1(b,c,d) + word0  + 0xd76aa478,  7) + b;
  uint32_t word1  = MYLITTLEENDIAN(words[ 1]);
  d = rotate(d + f1(a,b,c) + word1  + 0xe8c7b756, 12) + a;
  uint32_t word2  = MYLITTLEENDIAN(words[ 2]);
  c = rotate(c + f1(d,a,b) + word2  + 0x242070db, 17) + d;
  uint32_t word3  = MYLITTLEENDIAN(words[ 3]);
  b = rotate(b + f1(c,d,a) + word3  + 0xc1bdceee, 22) + c;
  uint32_t word4  = MYLITTLEENDIAN(words[ 4]);
  a = rotate(a + f1(b,c,d) + word4  + 0xf57c0faf,  7) + b;
  uint32_t word5  = MYLITTLEENDIAN(words[ 5]);
  d = rotate(d + f1(a,b,c) + word5  + 0x4787c62a, 12) + a;
  uint32_t word6  = MYLITTLEENDIAN(words[ 6]);
  c = rotate(c + f1(d,a,b) + word6  + 0xa8304613, 17) + d;
  uint32_t word7  = MYLITTLEENDIAN(words[ 7]);
  b = rotate(b + f1(c,d,a) + word7  + 0xfd469501, 22) + c;
  uint32_t word8  = MYLITTLEENDIAN(words[ 8]);
  a = rotate(a + f1(b,c,d) + word8  + 0x698098d8,  7) + b;
  uint32_t word9  = MYLITTLEENDIAN(words[ 9]);
  d = rotate(d + f1(a,b,c) + word9  + 0x8b44f7af, 12) + a;
  uint32_t word10 = MYLITTLEENDIAN(words[10]);
  c = rotate(c + f1(d,a,b) + word10 + 0xffff5bb1, 17) + d;
  uint32_t word11 = MYLITTLEENDIAN(words[11]);
  b = rotate(b + f1(c,d,a) + word11 + 0x895cd7be, 22) + c;
  uint32_t word12 = MYLITTLEENDIAN(words[12]);
  a = rotate(a + f1(b,c,d) + word12 + 0x6b901122,  7) + b;
  uint32_t word13 = MYLITTLEENDIAN(words[13]);
  d = rotate(d + f1(a,b,c) + word13 + 0xfd987193, 12) + a;
  uint32_t word14 = MYLITTLEENDIAN(words[14]);
  c = rotate(c + f1(d,a,b) + word14 + 0xa679438e, 17) + d;
  uint32_t word15 = MYLITTLEENDIAN(words[15]);
  b = rotate(b + f1(c,d,a) + word15 + 0x49b40821, 22) + c;
  // second round
  a = rotate(a + f2(b,c,d) + word1  + 0xf61e2562,  5) + b;
  d = rotate(d + f2(a,b,c) + word6  + 0xc040b340,  9) + a;
  c = rotate(c + f2(d,a,b) + word11 + 0x265e5a51, 14) + d;
  b = rotate(b + f2(c,d,a) + word0  + 0xe9b6c7aa, 20) + c;
  a = rotate(a + f2(b,c,d) + word5  + 0xd62f105d,  5) + b;
  d = rotate(d + f2(a,b,c) + word10 + 0x02441453,  9) + a;
  c = rotate(c + f2(d,a,b) + word15 + 0xd8a1e681, 14) + d;
  b = rotate(b + f2(c,d,a) + word4  + 0xe7d3fbc8, 20) + c;
  a = rotate(a + f2(b,c,d) + word9  + 0x21e1cde6,  5) + b;
  d = rotate(d + f2(a,b,c) + word14 + 0xc33707d6,  9) + a;
  c = rotate(c + f2(d,a,b) + word3  + 0xf4d50d87, 14) + d;
  b = rotate(b + f2(c,d,a) + word8  + 0x455a14ed, 20) + c;
  a = rotate(a + f2(b,c,d) + word13 + 0xa9e3e905,  5) + b;
  d = rotate(d + f2(a,b,c) + word2  + 0xfcefa3f8,  9) + a;
  c = rotate(c + f2(d,a,b) + word7  + 0x676f02d9, 14) + d;
  b = rotate(b + f2(c,d,a) + word12 + 0x8d2a4c8a, 20) + c;
  // third round
  a = rotate(a + f3(b,c,d) + word5  + 0xfffa3942,  4) + b;
  d = rotate(d + f3(a,b,c) + word8  + 0x8771f681, 11) + a;
  c = rotate(c + f3(d,a,b) + word11 + 0x6d9d6122, 16) + d;
  b = rotate(b + f3(c,d,a) + word14 + 0xfde5380c, 23) + c;
  a = rotate(a + f3(b,c,d) + word1  + 0xa4beea44,  4) + b;
  d = rotate(d + f3(a,b,c) + word4  + 0x4bdecfa9, 11) + a;
  c = rotate(c + f3(d,a,b) + word7  + 0xf6bb4b60, 16) + d;
  b = rotate(b + f3(c,d,a) + word10 + 0xbebfbc70, 23) + c;
  a = rotate(a + f3(b,c,d) + word13 + 0x289b7ec6,  4) + b;
  d = rotate(d + f3(a,b,c) + word0  + 0xeaa127fa, 11) + a;
  c = rotate(c + f3(d,a,b) + word3  + 0xd4ef3085, 16) + d;
  b = rotate(b + f3(c,d,a) + word6  + 0x04881d05, 23) + c;
  a = rotate(a + f3(b,c,d) + word9  + 0xd9d4d039,  4) + b;
  d = rotate(d + f3(a,b,c) + word12 + 0xe6db99e5, 11) + a;
  c = rotate(c + f3(d,a,b) + word15 + 0x1fa27cf8, 16) + d;
  b = rotate(b + f3(c,d,a) + word2  + 0xc4ac5665, 23) + c;
  // fourth round
  a = rotate(a + f4(b,c,d) + word0  + 0xf4292244,  6) + b;
  d = rotate(d + f4(a,b,c) + word7  + 0x432aff97, 10) + a;
  c = rotate(c + f4(d,a,b) + word14 + 0xab9423a7, 15) + d;
  b = rotate(b + f4(c,d,a) + word5  + 0xfc93a039, 21) + c;
  a = rotate(a + f4(b,c,d) + word12 + 0x655b59c3,  6) + b;
  d = rotate(d + f4(a,b,c) + word3  + 0x8f0ccc92, 10) + a;
  c = rotate(c + f4(d,a,b) + word10 + 0xffeff47d, 15) + d;
  b = rotate(b + f4(c,d,a) + word1  + 0x85845dd1, 21) + c;
  a = rotate(a + f4(b,c,d) + word8  + 0x6fa87e4f,  6) + b;
  d = rotate(d + f4(a,b,c) + word15 + 0xfe2ce6e0, 10) + a;
  c = rotate(c + f4(d,a,b) + word6  + 0xa3014314, 15) + d;
  b = rotate(b + f4(c,d,a) + word13 + 0x4e0811a1, 21) + c;
  a = rotate(a + f4(b,c,d) + word4  + 0xf7537e82,  6) + b;
  d = rotate(d + f4(a,b,c) + word11 + 0xbd3af235, 10) + a;
  c = rotate(c + f4(d,a,b) + word2  + 0x2ad7d2bb, 15) + d;
  b = rotate(b + f4(c,d,a) + word9  + 0xeb86d391, 21) + c;
  // update hash
  m_hash[0] += a;
  m_hash[1] += b;
  m_hash[2] += c;
  m_hash[3] += d;
}
/// add arbitrary number of bytes
void MD5::add(const void* data, size_t numBytes)
{
  const uint8_t* current = (const uint8_t*) data;
  if (m_bufferSize > 0)
  {
    while (numBytes > 0 && m_bufferSize < BlockSize)
    {
      m_buffer[m_bufferSize++] = *current++;
      numBytes--;
    }
  }
  // full buffer
  if (m_bufferSize == BlockSize)
  {
    processBlock(m_buffer);
    m_numBytes  += BlockSize;
    m_bufferSize = 0;
  }
  // no more data ?
  if (numBytes == 0)
    return;
  // process full blocks
  while (numBytes >= BlockSize)
  {
    processBlock(current);
    current    += BlockSize;
    m_numBytes += BlockSize;
    numBytes   -= BlockSize;
  }
  // keep remaining bytes in buffer
  while (numBytes > 0)
  {
    m_buffer[m_bufferSize++] = *current++;
    numBytes--;
  }
}
/// process final block, less than 64 bytes
void MD5::processBuffer()
{
  // the input bytes are considered as bits strings, where the first bit is the most significant bit of the byte
  // - append "1" bit to message
  // - append "0" bits until message length in bit mod 512 is 448
  // - append length as 64 bit integer
  // number of bits
  size_t paddedLength = m_bufferSize * 8;
  // plus one bit set to 1 (always appended)
  paddedLength++;
  // number of bits must be (numBits % 512) = 448
  size_t lower11Bits = paddedLength & 511;
  if (lower11Bits <= 448)
    paddedLength +=       448 - lower11Bits;
  else
    paddedLength += 512 + 448 - lower11Bits;
  // convert from bits to bytes
  paddedLength /= 8;
  // only needed if additional data flows over into a second block
  unsigned char extra[BlockSize];
  // append a "1" bit, 128 => binary 10000000
  if (m_bufferSize < BlockSize)
    m_buffer[m_bufferSize] = 128;
  else
    extra[0] = 128;
  size_t i;
  for (i = m_bufferSize + 1; i < BlockSize; i++)
    m_buffer[i] = 0;
  for (; i < paddedLength; i++)
    extra[i - BlockSize] = 0;
  // add message length in bits as 64 bit number
  uint64_t msgBits = 8 * (m_numBytes + m_bufferSize);
  // find right position
  unsigned char* addLength;
  if (paddedLength < BlockSize)
    addLength = m_buffer + paddedLength;
  else
    addLength = extra + paddedLength - BlockSize;
  // must be little endian
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF; msgBits >>= 8;
  *addLength++ = msgBits & 0xFF;
  // process blocks
  processBlock(m_buffer);
  // flowed over into a second block ?
  if (paddedLength > BlockSize)
    processBlock(extra);
}
/// return latest hash as 32 hex characters
std::string MD5::getHash()
{
  // compute hash (as raw bytes)
  unsigned char rawHash[HashBytes];
  getHash(rawHash);
  // convert to hex string
  std::string result;
  result.reserve(2 * HashBytes);
  for (int i = 0; i < HashBytes; i++)
  {
    static const char dec2hex[16+1] = "0123456789ABCDEF";
    result += dec2hex[(rawHash[i] >> 4) & 15];
    result += dec2hex[ rawHash[i]       & 15];
  }
  return result;
}
/// return latest hash as bytes
void MD5::getHash(unsigned char buffer[MD5::HashBytes])
{
  // save old hash if buffer is partially filled
  uint32_t oldHash[HashValues];
  for (int i = 0; i < HashValues; i++)
    oldHash[i] = m_hash[i];
  // process remaining bytes
  processBuffer();
  unsigned char* current = buffer;
  for (int i = 0; i < HashValues; i++)
  {
    *current++ =  m_hash[i]        & 0xFF;
    *current++ = (m_hash[i] >>  8) & 0xFF;
    *current++ = (m_hash[i] >> 16) & 0xFF;
    *current++ = (m_hash[i] >> 24) & 0xFF;
    // restore old hash
    m_hash[i] = oldHash[i];
  }
}
/// LICENSE_END.11



/// LICENSE_START.15
#if !defined(HWBLAKE3)
// Sligthly reworked to compile on C++ (and FreeBSD)
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
#define BLAKE3_CHUNK_LEN 1024
#define BLAKE3_MAX_DEPTH 54
typedef struct {
  uint32_t cv[8];
  uint64_t chunk_counter;
  uint8_t buf[BLAKE3_BLOCK_LEN];
  uint8_t buf_len;
  uint8_t blocks_compressed;
  uint8_t flags;
} blake3_chunk_state;
typedef struct {
  uint32_t key[8];
  blake3_chunk_state chunk;
  uint8_t cv_stack_len;
  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_update(blake3_hasher *self, const void *input,
                          size_t input_len);
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
                            size_t out_len);
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
                                 uint8_t *out, size_t out_len);
enum blake3_flags {
  CHUNK_START         = 1 << 0,
  CHUNK_END           = 1 << 1,
  PARENT              = 1 << 2,
  ROOT                = 1 << 3,
  KEYED_HASH          = 1 << 4,
  DERIVE_KEY_CONTEXT  = 1 << 5,
  DERIVE_KEY_MATERIAL = 1 << 6
};
#define MAX_SIMD_DEGREE 1
#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
                               0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
                               0x1F83D9ABUL, 0x5BE0CD19UL};
static const uint8_t MSG_SCHEDULE[7][16] = {
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
    {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
    {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
    {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
    {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
    {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
};
static unsigned int highest_one(uint64_t x) {
#if defined(__GNUC__) || defined(__clang__)
  return 63 ^ __builtin_clzll(x);
#elif defined(_MSC_VER) && defined(IS_X86_64)
  unsigned long index;
  _BitScanReverse64(&index, x);
  return index;
#elif defined(_MSC_VER) && defined(IS_X86_32)
  if(x >> 32) {
    unsigned long index;
    _BitScanReverse(&index, x >> 32);
    return 32 + index;
  } else {
    unsigned long index;
    _BitScanReverse(&index, x);
    return index;
  }
#else
  unsigned int c = 0;
  if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
  if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
  if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
  if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
  if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
  if(x & 0x0000000000000002ULL) {           c +=  1; }
  return c;
#endif
}
INLINE_divsuf unsigned int popcnt(uint64_t x) {
#if defined(__GNUC__) || defined(__clang__)
  return __builtin_popcountll(x);
#else
  unsigned int count = 0;
  while (x != 0) {
    count += 1;
    x &= x - 1;
  }
  return count;
#endif
}
INLINE_divsuf uint64_t round_down_to_power_of_2(uint64_t x) {
  return 1ULL << highest_one(x | 1);
}
INLINE_divsuf uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
INLINE_divsuf uint32_t counter_high(uint64_t counter) {
  return (uint32_t)(counter >> 32);
}
INLINE_divsuf uint32_t load32(const void *src) {
  const uint8_t *p = (const uint8_t *)src;
  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
         ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
}
INLINE_divsuf void store32(void *dst, uint32_t w) {
  uint8_t *p = (uint8_t *)dst;
  p[0] = (uint8_t)(w >> 0);
  p[1] = (uint8_t)(w >> 8);
  p[2] = (uint8_t)(w >> 16);
  p[3] = (uint8_t)(w >> 24);
}
INLINE_divsuf void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
  store32(&bytes_out[0 * 4], cv_words[0]);
  store32(&bytes_out[1 * 4], cv_words[1]);
  store32(&bytes_out[2 * 4], cv_words[2]);
  store32(&bytes_out[3 * 4], cv_words[3]);
  store32(&bytes_out[4 * 4], cv_words[4]);
  store32(&bytes_out[5 * 4], cv_words[5]);
  store32(&bytes_out[6 * 4], cv_words[6]);
  store32(&bytes_out[7 * 4], cv_words[7]);
}
void blake3_compress_in_place(uint32_t cv[8],
                              const uint8_t block[BLAKE3_BLOCK_LEN],
                              uint8_t block_len, uint64_t counter,
                              uint8_t flags);
void blake3_compress_xof(const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags,
                         uint8_t out[64]);
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
                      size_t blocks, const uint32_t key[8], uint64_t counter,
                      bool increment_counter, uint8_t flags,
                      uint8_t flags_start, uint8_t flags_end, uint8_t *out);
size_t blake3_simd_degree(void);
void blake3_compress_in_place_portable(uint32_t cv[8],
                                       const uint8_t block[BLAKE3_BLOCK_LEN],
                                       uint8_t block_len, uint64_t counter,
                                       uint8_t flags);
void blake3_compress_xof_portable(const uint32_t cv[8],
                                  const uint8_t block[BLAKE3_BLOCK_LEN],
                                  uint8_t block_len, uint64_t counter,
                                  uint8_t flags, uint8_t out[64]);
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
                               size_t blocks, const uint32_t key[8],
                               uint64_t counter, bool increment_counter,
                               uint8_t flags, uint8_t flags_start,
                               uint8_t flags_end, uint8_t *out);
INLINE_divsuf void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
                             uint8_t flags) {
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
  self->chunk_counter = 0;
  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
  self->buf_len = 0;
  self->blocks_compressed = 0;
  self->flags = flags;
}
INLINE_divsuf void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
                              uint64_t chunk_counter) {
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
  self->chunk_counter = chunk_counter;
  self->blocks_compressed = 0;
  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
  self->buf_len = 0;
}
INLINE_divsuf size_t chunk_state_len(const blake3_chunk_state *self) {
  return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
         ((size_t)self->buf_len);
}
INLINE_divsuf size_t chunk_state_fill_buf(blake3_chunk_state *self,
                                   const uint8_t *input, size_t input_len) {
  size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
  if (take > input_len) {
    take = input_len;
  }
  uint8_t *dest = self->buf + ((size_t)self->buf_len);
  memcpy(dest, input, take);
  self->buf_len += (uint8_t)take;
  return take;
}
INLINE_divsuf uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
  if (self->blocks_compressed == 0) {
    return CHUNK_START;
  } else {
    return 0;
  }
}
typedef struct {
  uint32_t input_cv[8];
  uint64_t counter;
  uint8_t block[BLAKE3_BLOCK_LEN];
  uint8_t block_len;
  uint8_t flags;
} output_t;
INLINE_divsuf output_t make_output(const uint32_t input_cv[8],
                            const uint8_t block[BLAKE3_BLOCK_LEN],
                            uint8_t block_len, uint64_t counter,
                            uint8_t flags) {
  output_t ret;
  memcpy(ret.input_cv, input_cv, 32);
  memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
  ret.block_len = block_len;
  ret.counter = counter;
  ret.flags = flags;
  return ret;
}
INLINE_divsuf void output_chaining_value(const output_t *self, uint8_t cv[32]) {
  uint32_t cv_words[8];
  memcpy(cv_words, self->input_cv, 32);
  blake3_compress_in_place(cv_words, self->block, self->block_len,
                           self->counter, self->flags);
  store_cv_words(cv, cv_words);
}
INLINE_divsuf void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
                              size_t out_len) {
  uint64_t output_block_counter = seek / 64;
  size_t offset_within_block = seek % 64;
  uint8_t wide_buf[64];
  while (out_len > 0) {
    blake3_compress_xof(self->input_cv, self->block, self->block_len,
                        output_block_counter, self->flags | ROOT, wide_buf);
    size_t available_bytes = 64 - offset_within_block;
    size_t memcpy_len;
    if (out_len > available_bytes) {
      memcpy_len = available_bytes;
    } else {
      memcpy_len = out_len;
    }
    memcpy(out, wide_buf + offset_within_block, memcpy_len);
    out += memcpy_len;
    out_len -= memcpy_len;
    output_block_counter += 1;
    offset_within_block = 0;
  }
}
INLINE_divsuf void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
                               size_t input_len) {
  if (self->buf_len > 0) {
    size_t take = chunk_state_fill_buf(self, input, input_len);
    input += take;
    input_len -= take;
    if (input_len > 0) {
      blake3_compress_in_place(
          self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
          self->flags | chunk_state_maybe_start_flag(self));
      self->blocks_compressed += 1;
      self->buf_len = 0;
      memset(self->buf, 0, BLAKE3_BLOCK_LEN);
    }
  }
  while (input_len > BLAKE3_BLOCK_LEN) {
    blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
                             self->chunk_counter,
                             self->flags | chunk_state_maybe_start_flag(self));
    self->blocks_compressed += 1;
    input += BLAKE3_BLOCK_LEN;
    input_len -= BLAKE3_BLOCK_LEN;
  }
	(void)chunk_state_fill_buf(self, input, input_len);
///  size_t take = chunk_state_fill_buf(self, input, input_len);
///  input += take;
///  input_len -= take;
}
INLINE_divsuf output_t chunk_state_output(const blake3_chunk_state *self) {
  uint8_t block_flags =
      self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
  return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
                     block_flags);
}
INLINE_divsuf output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
                              const uint32_t key[8], uint8_t flags) {
  return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
}
INLINE_divsuf size_t left_len(size_t content_len) {
  size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
  return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
}
INLINE_divsuf size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
                                       const uint32_t key[8],
                                       uint64_t chunk_counter, uint8_t flags,
                                       uint8_t *out) {
#if defined(BLAKE3_TESTING)
  assert(0 < input_len);
  assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);
#endif
  const uint8_t *chunks_array[MAX_SIMD_DEGREE];
  size_t input_position = 0;
  size_t chunks_array_len = 0;
  while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
    chunks_array[chunks_array_len] = &input[input_position];
    input_position += BLAKE3_CHUNK_LEN;
    chunks_array_len += 1;
  }
  blake3_hash_many(chunks_array, chunks_array_len,
                   BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
                   true, flags, CHUNK_START, CHUNK_END, out);
  if (input_len > input_position) {
    uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
    blake3_chunk_state chunk_state;
    chunk_state_init(&chunk_state, key, flags);
    chunk_state.chunk_counter = counter;
    chunk_state_update(&chunk_state, &input[input_position],
                       input_len - input_position);
    output_t output = chunk_state_output(&chunk_state);
    output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
    return chunks_array_len + 1;
  } else {
    return chunks_array_len;
  }
}
INLINE_divsuf size_t compress_parents_parallel(const uint8_t *child_chaining_values,
                                        size_t num_chaining_values,
                                        const uint32_t key[8], uint8_t flags,
                                        uint8_t *out) {
#if defined(BLAKE3_TESTING)
  assert(2 <= num_chaining_values);
  assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);
#endif
  const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
  size_t parents_array_len = 0;
  while (num_chaining_values - (2 * parents_array_len) >= 2) {
    parents_array[parents_array_len] =
        &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
    parents_array_len += 1;
  }
  blake3_hash_many(parents_array, parents_array_len, 1, key,
                   0,
                   false, flags | PARENT,
                   0,
                   0,
                   out);
  if (num_chaining_values > 2 * parents_array_len) {
	  memcpy(&out+(parents_array_len * BLAKE3_OUT_LEN),
           &child_chaining_values+(2 * parents_array_len * BLAKE3_OUT_LEN),
           BLAKE3_OUT_LEN); /// FAKE COMPILER WARNING
    return parents_array_len + 1;
  } else {
    return parents_array_len;
  }
}
static size_t blake3_compress_subtree_wide(const uint8_t *input,
                                           size_t input_len,
                                           const uint32_t key[8],
                                           uint64_t chunk_counter,
                                           uint8_t flags, uint8_t *out) {
  if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
    return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
                                    out);
  }
  size_t left_input_len = left_len(input_len);
  size_t right_input_len = input_len - left_input_len;
  const uint8_t *right_input = &input[left_input_len];
  uint64_t right_chunk_counter =
      chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
  uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  size_t degree = blake3_simd_degree();
  if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
    degree = 2;
  }
  uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
  size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
                                               chunk_counter, flags, cv_array);
  size_t right_n = blake3_compress_subtree_wide(
      right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
  if (left_n == 1) {
    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
    return 2;
  }
  size_t num_chaining_values = left_n + right_n;
  return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
                                   out); /// FAKE WARNING
}
INLINE_divsuf void compress_subtree_to_parent_node(
    const uint8_t *input, size_t input_len, const uint32_t key[8],
    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
#if defined(BLAKE3_TESTING)
  assert(input_len > BLAKE3_CHUNK_LEN);
#endif
  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
                                                chunk_counter, flags, cv_array);
  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN ];
  while (num_cvs > 2) {
    num_cvs =
        compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
    memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
  }
  memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
}
INLINE_divsuf void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
                             uint8_t flags) {
  memcpy(self->key, key, BLAKE3_KEY_LEN);
  chunk_state_init(&self->chunk, key, flags);
  self->cv_stack_len = 0;
}
void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
INLINE_divsuf void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
  size_t post_merge_stack_len = (size_t)popcnt(total_len);
  while (self->cv_stack_len > post_merge_stack_len) {
    uint8_t *parent_node =
        &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
    output_t output = parent_output(parent_node, self->key, self->chunk.flags);
    output_chaining_value(&output, parent_node);
    self->cv_stack_len -= 1;
  }
}
INLINE_divsuf void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
                           uint64_t chunk_counter) {
  hasher_merge_cv_stack(self, chunk_counter);
  memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
         BLAKE3_OUT_LEN);
  self->cv_stack_len += 1;
}
void blake3_hasher_update(blake3_hasher *self, const void *input,
                          size_t input_len) {
  if (input_len == 0) {
    return;
  }
  const uint8_t *input_bytes = (const uint8_t *)input;
  if (chunk_state_len(&self->chunk) > 0) {
    size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
    if (take > input_len) {
      take = input_len;
    }
    chunk_state_update(&self->chunk, input_bytes, take);
    input_bytes += take;
    input_len -= take;
    if (input_len > 0) {
      output_t output = chunk_state_output(&self->chunk);
      uint8_t chunk_cv[32];
      output_chaining_value(&output, chunk_cv);
      hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
      chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
    } else {
      return;
    }
  }
  while (input_len > BLAKE3_CHUNK_LEN) {
    size_t subtree_len = round_down_to_power_of_2(input_len);
    uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
    while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
      subtree_len /= 2;
    }
    uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
    if (subtree_len <= BLAKE3_CHUNK_LEN) {
      blake3_chunk_state chunk_state;
      chunk_state_init(&chunk_state, self->key, self->chunk.flags);
      chunk_state.chunk_counter = self->chunk.chunk_counter;
      chunk_state_update(&chunk_state, input_bytes, subtree_len);
      output_t output = chunk_state_output(&chunk_state);
      uint8_t cv[BLAKE3_OUT_LEN];
      output_chaining_value(&output, cv);
      hasher_push_cv(self, cv, chunk_state.chunk_counter);
    } else {
      uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
      compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
                                      self->chunk.chunk_counter,
                                      self->chunk.flags, cv_pair); /// FAKE COMPILER WARNING HERE!
      hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
      hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
                     self->chunk.chunk_counter + (subtree_chunks / 2));
    }
    self->chunk.chunk_counter += subtree_chunks;
    input_bytes += subtree_len;
    input_len -= subtree_len;
  }
  if (input_len > 0) {
    chunk_state_update(&self->chunk, input_bytes, input_len);
    hasher_merge_cv_stack(self, self->chunk.chunk_counter);
  }
}
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
                            size_t out_len) {
  blake3_hasher_finalize_seek(self, 0, out, out_len);
}
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
                                 uint8_t *out, size_t out_len) {
  if (out_len == 0) {
    return;
  }
  if (self->cv_stack_len == 0) {
    output_t output = chunk_state_output(&self->chunk);
    output_root_bytes(&output, seek, out, out_len);
    return;
  }
  output_t output;
  size_t cvs_remaining;
  if (chunk_state_len(&self->chunk) > 0) {
    cvs_remaining = self->cv_stack_len;
    output = chunk_state_output(&self->chunk);
  } else {
    cvs_remaining = self->cv_stack_len - 2;
    output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
                           self->chunk.flags);
  }
  while (cvs_remaining > 0) {
    cvs_remaining -= 1;
    uint8_t parent_block[BLAKE3_BLOCK_LEN];
    memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
    output_chaining_value(&output, &parent_block[32]);
    output = parent_output(parent_block, self->key, self->chunk.flags);
  }
  output_root_bytes(&output, seek, out, out_len);
}
#define MAYBE_UNUSED(x) (void)((x))
#if defined(IS_X86)
static uint64_t xgetbv() {
#if defined(_MSC_VER)
  return _xgetbv(0);
#else
  uint32_t eax = 0, edx = 0;
  __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
  return ((uint64_t)edx << 32) | eax;
#endif
}
static void cpuid(uint32_t out[4], uint32_t id) {
#if defined(_MSC_VER)
  __cpuid((int *)out, id);
#elif defined(__i386__) || defined(_M_IX86)
  __asm__ __volatile__("movl %%ebx, %1\n"
                       "cpuid\n"
                       "xchgl %1, %%ebx\n"
                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id));
#else
  __asm__ __volatile__("cpuid\n"
                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id));
#endif
}
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#if defined(_MSC_VER)
  __cpuidex((int *)out, id, sid);
#elif defined(__i386__) || defined(_M_IX86)
  __asm__ __volatile__("movl %%ebx, %1\n"
                       "cpuid\n"
                       "xchgl %1, %%ebx\n"
                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id), "c"(sid));
#else
  __asm__ __volatile__("cpuid\n"
                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id), "c"(sid));
#endif
}
#endif
void blake3_compress_in_place(uint32_t cv[8],
                              const uint8_t block[BLAKE3_BLOCK_LEN],
                              uint8_t block_len, uint64_t counter,
                              uint8_t flags) {
  blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
}
void blake3_compress_xof(const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags,
                         uint8_t out[64]) {
  blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
}
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
                      size_t blocks, const uint32_t key[8], uint64_t counter,
                      bool increment_counter, uint8_t flags,
                      uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
                            increment_counter, flags, flags_start, flags_end,
                            out);
}
size_t blake3_simd_degree(void)
{
  return 1;
}
INLINE_divsuf uint32_t rotr32(uint32_t w, uint32_t c) {
  return (w >> c) | (w << (32 - c));
}
INLINE_divsuf void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
              uint32_t x, uint32_t y) {
  state[a] = state[a] + state[b] + x;
  state[d] = rotr32(state[d] ^ state[a], 16);
  state[c] = state[c] + state[d];
  state[b] = rotr32(state[b] ^ state[c], 12);
  state[a] = state[a] + state[b] + y;
  state[d] = rotr32(state[d] ^ state[a], 8);
  state[c] = state[c] + state[d];
  state[b] = rotr32(state[b] ^ state[c], 7);
}
INLINE_divsuf void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
  const uint8_t *schedule = MSG_SCHEDULE[round];
  g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
  g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
  g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
  g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
  g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
  g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
  g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
  g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
}
INLINE_divsuf void compress_pre(uint32_t state[16], const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags) {
  uint32_t block_words[16];
  block_words[0] = load32(block + 4 * 0);
  block_words[1] = load32(block + 4 * 1);
  block_words[2] = load32(block + 4 * 2);
  block_words[3] = load32(block + 4 * 3);
  block_words[4] = load32(block + 4 * 4);
  block_words[5] = load32(block + 4 * 5);
  block_words[6] = load32(block + 4 * 6);
  block_words[7] = load32(block + 4 * 7);
  block_words[8] = load32(block + 4 * 8);
  block_words[9] = load32(block + 4 * 9);
  block_words[10] = load32(block + 4 * 10);
  block_words[11] = load32(block + 4 * 11);
  block_words[12] = load32(block + 4 * 12);
  block_words[13] = load32(block + 4 * 13);
  block_words[14] = load32(block + 4 * 14);
  block_words[15] = load32(block + 4 * 15);
  state[0] = cv[0];
  state[1] = cv[1];
  state[2] = cv[2];
  state[3] = cv[3];
  state[4] = cv[4];
  state[5] = cv[5];
  state[6] = cv[6];
  state[7] = cv[7];
  state[8] = IV[0];
  state[9] = IV[1];
  state[10] = IV[2];
  state[11] = IV[3];
  state[12] = counter_low(counter);
  state[13] = counter_high(counter);
  state[14] = (uint32_t)block_len;
  state[15] = (uint32_t)flags;
  round_fn(state, &block_words[0], 0);
  round_fn(state, &block_words[0], 1);
  round_fn(state, &block_words[0], 2);
  round_fn(state, &block_words[0], 3);
  round_fn(state, &block_words[0], 4);
  round_fn(state, &block_words[0], 5);
  round_fn(state, &block_words[0], 6);
}
void blake3_compress_in_place_portable(uint32_t cv[8],
                                       const uint8_t block[BLAKE3_BLOCK_LEN],
                                       uint8_t block_len, uint64_t counter,
                                       uint8_t flags) {
  uint32_t state[16];
  compress_pre(state, cv, block, block_len, counter, flags);
  cv[0] = state[0] ^ state[8];
  cv[1] = state[1] ^ state[9];
  cv[2] = state[2] ^ state[10];
  cv[3] = state[3] ^ state[11];
  cv[4] = state[4] ^ state[12];
  cv[5] = state[5] ^ state[13];
  cv[6] = state[6] ^ state[14];
  cv[7] = state[7] ^ state[15];
}
void blake3_compress_xof_portable(const uint32_t cv[8],
                                  const uint8_t block[BLAKE3_BLOCK_LEN],
                                  uint8_t block_len, uint64_t counter,
                                  uint8_t flags, uint8_t out[64]) {
  uint32_t state[16];
  compress_pre(state, cv, block, block_len, counter, flags);
  store32(&out[0 * 4], state[0] ^ state[8]);
  store32(&out[1 * 4], state[1] ^ state[9]);
  store32(&out[2 * 4], state[2] ^ state[10]);
  store32(&out[3 * 4], state[3] ^ state[11]);
  store32(&out[4 * 4], state[4] ^ state[12]);
  store32(&out[5 * 4], state[5] ^ state[13]);
  store32(&out[6 * 4], state[6] ^ state[14]);
  store32(&out[7 * 4], state[7] ^ state[15]);
  store32(&out[8 * 4], state[8] ^ cv[0]);
  store32(&out[9 * 4], state[9] ^ cv[1]);
  store32(&out[10 * 4], state[10] ^ cv[2]);
  store32(&out[11 * 4], state[11] ^ cv[3]);
  store32(&out[12 * 4], state[12] ^ cv[4]);
  store32(&out[13 * 4], state[13] ^ cv[5]);
  store32(&out[14 * 4], state[14] ^ cv[6]);
  store32(&out[15 * 4], state[15] ^ cv[7]);
}
INLINE_divsuf void hash_one_portable(const uint8_t *input, size_t blocks,
                              const uint32_t key[8], uint64_t counter,
                              uint8_t flags, uint8_t flags_start,
                              uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
  uint32_t cv[8];
  memcpy(cv, key, BLAKE3_KEY_LEN);
  uint8_t block_flags = flags | flags_start;
  while (blocks > 0) {
    if (blocks == 1) {
      block_flags |= flags_end;
    }
    blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
                                      block_flags);
    input = &input[BLAKE3_BLOCK_LEN];
    blocks -= 1;
    block_flags = flags;
  }
  store_cv_words(out, cv);
}
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
                               size_t blocks, const uint32_t key[8],
                               uint64_t counter, bool increment_counter,
                               uint8_t flags, uint8_t flags_start,
                               uint8_t flags_end, uint8_t *out) {
  while (num_inputs > 0) {
    hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
                      flags_end, out);
    if (increment_counter) {
      counter += 1;
    }
    inputs += 1;
    num_inputs -= 1;
    out = &out[BLAKE3_OUT_LEN];
  }
}
#else
#define BLAKE3_VERSION_STRING "1.0.0"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
#define BLAKE3_CHUNK_LEN 1024
#define BLAKE3_MAX_DEPTH 54
typedef struct {
  uint32_t cv[8];
  uint64_t chunk_counter;
  uint8_t buf[BLAKE3_BLOCK_LEN];
  uint8_t buf_len;
  uint8_t blocks_compressed;
  uint8_t flags;
} blake3_chunk_state;
typedef struct {
  uint32_t key[8];
  blake3_chunk_state chunk;
  uint8_t cv_stack_len;
  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
                                       size_t context_len);
void blake3_hasher_update(blake3_hasher *self, const void *input,
                          size_t input_len);
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
                            size_t out_len);
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
                                 uint8_t *out, size_t out_len);
enum blake3_flags {
  CHUNK_START         = 1 << 0,
  CHUNK_END           = 1 << 1,
  PARENT              = 1 << 2,
  ROOT                = 1 << 3,
  KEYED_HASH          = 1 << 4,
  DERIVE_KEY_CONTEXT  = 1 << 5,
  DERIVE_KEY_MATERIAL = 1 << 6,
};
#if defined(_MSC_VER)
#define INLINE static __forceinline
#else
#define INLINE static inline __attribute__((always_inline))
#endif
#if defined(__x86_64__) || defined(_M_X64)
#define IS_X86
#define IS_X86_64
#endif
#if defined(__i386__) || defined(_M_IX86)
#define IS_X86
#define IS_X86_32
#endif
#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#include <immintrin.h>
#endif
#if defined(IS_X86)
#define MAX_SIMD_DEGREE 16
#elif defined(BLAKE3_USE_NEON)
#define MAX_SIMD_DEGREE 4
#else
#define MAX_SIMD_DEGREE 1
#endif
#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
                               0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
                               0x1F83D9ABUL, 0x5BE0CD19UL};
static const uint8_t MSG_SCHEDULE[7][16] = {
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
    {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
    {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
    {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
    {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
    {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
};
static unsigned int highest_one(uint64_t x) {
#if defined(__GNUC__) || defined(__clang__)
  return 63 ^ __builtin_clzll(x);
#elif defined(_MSC_VER) && defined(IS_X86_64)
  unsigned long index;
  _BitScanReverse64(&index, x);
  return index;
#elif defined(_MSC_VER) && defined(IS_X86_32)
  if(x >> 32) {
    unsigned long index;
    _BitScanReverse(&index, x >> 32);
    return 32 + index;
  } else {
    unsigned long index;
    _BitScanReverse(&index, x);
    return index;
  }
#else
  unsigned int c = 0;
  if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
  if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
  if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
  if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
  if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
  if(x & 0x0000000000000002ULL) {           c +=  1; }
  return c;
#endif
}
INLINE unsigned int popcnt(uint64_t x) {
#if defined(__GNUC__) || defined(__clang__)
  return __builtin_popcountll(x);
#else
  unsigned int count = 0;
  while (x != 0) {
    count += 1;
    x &= x - 1;
  }
  return count;
#endif
}
INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
  return 1ULL << highest_one(x | 1);
}
INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
INLINE uint32_t counter_high(uint64_t counter) {
  return (uint32_t)(counter >> 32);
}
INLINE uint32_t load32(const void *src) {
  const uint8_t *p = (const uint8_t *)src;
  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
         ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
}
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
                           uint32_t key_words[8]) {
  key_words[0] = load32(&key[0 * 4]);
  key_words[1] = load32(&key[1 * 4]);
  key_words[2] = load32(&key[2 * 4]);
  key_words[3] = load32(&key[3 * 4]);
  key_words[4] = load32(&key[4 * 4]);
  key_words[5] = load32(&key[5 * 4]);
  key_words[6] = load32(&key[6 * 4]);
  key_words[7] = load32(&key[7 * 4]);
}
INLINE void store32(void *dst, uint32_t w) {
  uint8_t *p = (uint8_t *)dst;
  p[0] = (uint8_t)(w >> 0);
  p[1] = (uint8_t)(w >> 8);
  p[2] = (uint8_t)(w >> 16);
  p[3] = (uint8_t)(w >> 24);
}
INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
  store32(&bytes_out[0 * 4], cv_words[0]);
  store32(&bytes_out[1 * 4], cv_words[1]);
  store32(&bytes_out[2 * 4], cv_words[2]);
  store32(&bytes_out[3 * 4], cv_words[3]);
  store32(&bytes_out[4 * 4], cv_words[4]);
  store32(&bytes_out[5 * 4], cv_words[5]);
  store32(&bytes_out[6 * 4], cv_words[6]);
  store32(&bytes_out[7 * 4], cv_words[7]);
}
void blake3_compress_in_place(uint32_t cv[8],
                              const uint8_t block[BLAKE3_BLOCK_LEN],
                              uint8_t block_len, uint64_t counter,
                              uint8_t flags);
void blake3_compress_xof(const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags,
                         uint8_t out[64]);
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
                      size_t blocks, const uint32_t key[8], uint64_t counter,
                      bool increment_counter, uint8_t flags,
                      uint8_t flags_start, uint8_t flags_end, uint8_t *out);
size_t blake3_simd_degree(void);
void blake3_compress_in_place_portable(uint32_t cv[8],
                                       const uint8_t block[BLAKE3_BLOCK_LEN],
                                       uint8_t block_len, uint64_t counter,
                                       uint8_t flags);
void blake3_compress_xof_portable(const uint32_t cv[8],
                                  const uint8_t block[BLAKE3_BLOCK_LEN],
                                  uint8_t block_len, uint64_t counter,
                                  uint8_t flags, uint8_t out[64]);
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
                               size_t blocks, const uint32_t key[8],
                               uint64_t counter, bool increment_counter,
                               uint8_t flags, uint8_t flags_start,
                               uint8_t flags_end, uint8_t *out);
#if defined(IS_X86)
#if !defined(BLAKE3_NO_SSE2)
extern "C" void blake3_compress_in_place_sse2(uint32_t cv[8],
                                   const uint8_t block[BLAKE3_BLOCK_LEN],
                                   uint8_t block_len, uint64_t counter,
                                   uint8_t flags);
extern "C" void blake3_compress_xof_sse2(const uint32_t cv[8],
                              const uint8_t block[BLAKE3_BLOCK_LEN],
                              uint8_t block_len, uint64_t counter,
                              uint8_t flags, uint8_t out[64]);
extern "C" void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
                           size_t blocks, const uint32_t key[8],
                           uint64_t counter, bool increment_counter,
                           uint8_t flags, uint8_t flags_start,
                           uint8_t flags_end, uint8_t *out);
#endif
#if !defined(BLAKE3_NO_SSE41)
extern "C" void blake3_compress_in_place_sse41(uint32_t cv[8],
                                    const uint8_t block[BLAKE3_BLOCK_LEN],
                                    uint8_t block_len, uint64_t counter,
                                    uint8_t flags);
extern "C" void blake3_compress_xof_sse41(const uint32_t cv[8],
                               const uint8_t block[BLAKE3_BLOCK_LEN],
                               uint8_t block_len, uint64_t counter,
                               uint8_t flags, uint8_t out[64]);
extern "C" void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
                            size_t blocks, const uint32_t key[8],
                            uint64_t counter, bool increment_counter,
                            uint8_t flags, uint8_t flags_start,
                            uint8_t flags_end, uint8_t *out);
#endif
#if !defined(BLAKE3_NO_AVX2)
extern "C" void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
                           size_t blocks, const uint32_t key[8],
                           uint64_t counter, bool increment_counter,
                           uint8_t flags, uint8_t flags_start,
                           uint8_t flags_end, uint8_t *out);
#endif
#if !defined(BLAKE3_NO_AVX512)
extern "C" void blake3_compress_in_place_avx512(uint32_t cv[8],
                                     const uint8_t block[BLAKE3_BLOCK_LEN],
                                     uint8_t block_len, uint64_t counter,
                                     uint8_t flags);
extern "C" void blake3_compress_xof_avx512(const uint32_t cv[8],
                                const uint8_t block[BLAKE3_BLOCK_LEN],
                                uint8_t block_len, uint64_t counter,
                                uint8_t flags, uint8_t out[64]);
extern "C" void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
                             size_t blocks, const uint32_t key[8],
                             uint64_t counter, bool increment_counter,
                             uint8_t flags, uint8_t flags_start,
                             uint8_t flags_end, uint8_t *out);
#endif
#endif
#if defined(BLAKE3_USE_NEON)
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
                           size_t blocks, const uint32_t key[8],
                           uint64_t counter, bool increment_counter,
                           uint8_t flags, uint8_t flags_start,
                           uint8_t flags_end, uint8_t *out);
#endif
#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <immintrin.h>
#else
#error "Unimplemented!"
#endif
#endif
#define MAYBE_UNUSED(x) (void)((x))
#if defined(IS_X86)
static uint64_t xgetbv() {
#if defined(_MSC_VER)
  return _xgetbv(0);
#else
  uint32_t eax = 0, edx = 0;
  __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
  return ((uint64_t)edx << 32) | eax;
#endif
}
static void cpuid(uint32_t out[4], uint32_t id) {
#if defined(_MSC_VER)
  __cpuid((int *)out, id);
#elif defined(__i386__) || defined(_M_IX86)
  __asm__ __volatile__("movl %%ebx, %1\n"
                       "cpuid\n"
                       "xchgl %1, %%ebx\n"
                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id));
#else
  __asm__ __volatile__("cpuid\n"
                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id));
#endif
}
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#if defined(_MSC_VER)
  __cpuidex((int *)out, id, sid);
#elif defined(__i386__) || defined(_M_IX86)
  __asm__ __volatile__("movl %%ebx, %1\n"
                       "cpuid\n"
                       "xchgl %1, %%ebx\n"
                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id), "c"(sid));
#else
  __asm__ __volatile__("cpuid\n"
                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
                       : "a"(id), "c"(sid));
#endif
}
#endif
#define FRANZFIX
#define SSE2  		1
#define SSSE3 		2
#define SSE41 		4
#define AVX 		8
#define AVX2 		16
#define AVX512F 	32
#define AVX512VL 	64
#define UNDEFINED 1<<30
#ifdef FRANZFIX
#else
enum cpu_feature {
  SSE2 = 1 << 0,
  SSSE3 = 1 << 1,
  SSE41 = 1 << 2,
  AVX = 1 << 3,
  AVX2 = 1 << 4,
  AVX512F = 1 << 5,
  AVX512VL = 1 << 6,
  UNDEFINED = 1 << 30
};
#endif
#if !defined(BLAKE3_TESTING)
static
#endif
#ifdef FRANZFIX
    uint32_t g_cpu_features = UNDEFINED;
#else
    enum cpu_feature g_cpu_features = UNDEFINED;
#endif
#if !defined(BLAKE3_TESTING)
static
#endif
#ifdef FRANZFIX
	uint32_t
#else
    enum cpu_feature
#endif
    get_cpu_features() {
  if (g_cpu_features != UNDEFINED) {
    return g_cpu_features;
  } else {
#if defined(IS_X86)
    uint32_t regs[4] = {0};
    uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
    (void)edx;
#ifdef FRANZFIX
    uint32_t features = 0;
#else
    enum cpu_feature features = 0;
#endif
    cpuid(regs, 0);
    const int max_id = *eax;
    cpuid(regs, 1);
#if defined(__amd64__) || defined(_M_X64)
    features |= SSE2;
#else
    if (*edx & (1UL << 26))
      features |= SSE2;
#endif
    if (*ecx & (1UL << 0))
      features |= SSSE3;
    if (*ecx & (1UL << 19))
      features |= SSE41;
    if (*ecx & (1UL << 27)) {
      const uint64_t mask = xgetbv();
      if ((mask & 6) == 6) {
        if (*ecx & (1UL << 28))
          features |= AVX;
        if (max_id >= 7) {
          cpuidex(regs, 7, 0);
          if (*ebx & (1UL << 5))
            features |= AVX2;
          if ((mask & 224) == 224) {
            if (*ebx & (1UL << 31))
              features |= AVX512VL;
            if (*ebx & (1UL << 16))
              features |= AVX512F;
          }
        }
      }
    }
	g_cpu_features = features;
	if (flagdebug3)
		myprintf("00009: CPU feature %04X\n",features);
    return features;
#else
    return 0;
#endif
  }
}
void blake3_compress_in_place(uint32_t cv[8],
                              const uint8_t block[BLAKE3_BLOCK_LEN],
                              uint8_t block_len, uint64_t counter,
                              uint8_t flags) {
#if defined(IS_X86)
#ifdef FRANZFIX
  uint32_t features = get_cpu_features();
#else
  const enum cpu_feature features = get_cpu_features();
#endif
  MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
  if (features & AVX512VL) {
    blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE41)
  if (features & SSE41) {
    blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE2)
  if (features & SSE2) {
    blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
    return;
  }
#endif
#endif
  blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
}
void blake3_compress_xof(const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags,
                         uint8_t out[64]) {
#if defined(IS_X86)
#ifdef FRANZFIX
  const uint32_t features = get_cpu_features();
#else
  const enum cpu_feature features = get_cpu_features();
#endif
  MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
  if (features & AVX512VL) {
    blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE41)
  if (features & SSE41) {
    blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE2)
  if (features & SSE2) {
    blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
    return;
  }
#endif
#endif
  blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
}
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
                      size_t blocks, const uint32_t key[8], uint64_t counter,
                      bool increment_counter, uint8_t flags,
                      uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
#if defined(IS_X86)
#ifdef FRANZFIX
  const uint32_t features = get_cpu_features();
#else
  const enum cpu_feature features = get_cpu_features();
#endif
  MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
    blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
                            increment_counter, flags, flags_start, flags_end,
                            out);
    return;
  }
#endif
#if !defined(BLAKE3_NO_AVX2)
  if (features & AVX2) {
    blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
                          increment_counter, flags, flags_start, flags_end,
                          out);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE41)
  if (features & SSE41) {
    blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
                           increment_counter, flags, flags_start, flags_end,
                           out);
    return;
  }
#endif
#if !defined(BLAKE3_NO_SSE2)
  if (features & SSE2) {
    blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
                          increment_counter, flags, flags_start, flags_end,
                          out);
    return;
  }
#endif
#endif
#if defined(BLAKE3_USE_NEON)
  blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
                        increment_counter, flags, flags_start, flags_end, out);
  return;
#endif
  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
                            increment_counter, flags, flags_start, flags_end,
                            out);
}
size_t blake3_simd_degree(void) {
#if defined(IS_X86)
#ifdef FRANZFIX
  uint32_t features = get_cpu_features();
#else
  const enum cpu_feature features = get_cpu_features();
#endif
  MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
    return 16;
  }
#endif
#if !defined(BLAKE3_NO_AVX2)
  if (features & AVX2) {
    return 8;
  }
#endif
#if !defined(BLAKE3_NO_SSE41)
  if (features & SSE41) {
    return 4;
  }
#endif
#if !defined(BLAKE3_NO_SSE2)
  if (features & SSE2) {
    return 4;
  }
#endif
#endif
#if defined(BLAKE3_USE_NEON)
  return 4;
#endif
  return 1;
}
INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
  return (w >> c) | (w << (32 - c));
}
INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
              uint32_t x, uint32_t y) {
  state[a] = state[a] + state[b] + x;
  state[d] = rotr32(state[d] ^ state[a], 16);
  state[c] = state[c] + state[d];
  state[b] = rotr32(state[b] ^ state[c], 12);
  state[a] = state[a] + state[b] + y;
  state[d] = rotr32(state[d] ^ state[a], 8);
  state[c] = state[c] + state[d];
  state[b] = rotr32(state[b] ^ state[c], 7);
}
INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
  const uint8_t *schedule = MSG_SCHEDULE[round];
  g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
  g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
  g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
  g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
  g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
  g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
  g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
  g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
}
INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
                         const uint8_t block[BLAKE3_BLOCK_LEN],
                         uint8_t block_len, uint64_t counter, uint8_t flags) {
  uint32_t block_words[16];
  block_words[0] = load32(block + 4 * 0);
  block_words[1] = load32(block + 4 * 1);
  block_words[2] = load32(block + 4 * 2);
  block_words[3] = load32(block + 4 * 3);
  block_words[4] = load32(block + 4 * 4);
  block_words[5] = load32(block + 4 * 5);
  block_words[6] = load32(block + 4 * 6);
  block_words[7] = load32(block + 4 * 7);
  block_words[8] = load32(block + 4 * 8);
  block_words[9] = load32(block + 4 * 9);
  block_words[10] = load32(block + 4 * 10);
  block_words[11] = load32(block + 4 * 11);
  block_words[12] = load32(block + 4 * 12);
  block_words[13] = load32(block + 4 * 13);
  block_words[14] = load32(block + 4 * 14);
  block_words[15] = load32(block + 4 * 15);
  state[0] = cv[0];
  state[1] = cv[1];
  state[2] = cv[2];
  state[3] = cv[3];
  state[4] = cv[4];
  state[5] = cv[5];
  state[6] = cv[6];
  state[7] = cv[7];
  state[8] = IV[0];
  state[9] = IV[1];
  state[10] = IV[2];
  state[11] = IV[3];
  state[12] = counter_low(counter);
  state[13] = counter_high(counter);
  state[14] = (uint32_t)block_len;
  state[15] = (uint32_t)flags;
  round_fn(state, &block_words[0], 0);
  round_fn(state, &block_words[0], 1);
  round_fn(state, &block_words[0], 2);
  round_fn(state, &block_words[0], 3);
  round_fn(state, &block_words[0], 4);
  round_fn(state, &block_words[0], 5);
  round_fn(state, &block_words[0], 6);
}
void blake3_compress_in_place_portable(uint32_t cv[8],
                                       const uint8_t block[BLAKE3_BLOCK_LEN],
                                       uint8_t block_len, uint64_t counter,
                                       uint8_t flags) {
  uint32_t state[16];
  compress_pre(state, cv, block, block_len, counter, flags);
  cv[0] = state[0] ^ state[8];
  cv[1] = state[1] ^ state[9];
  cv[2] = state[2] ^ state[10];
  cv[3] = state[3] ^ state[11];
  cv[4] = state[4] ^ state[12];
  cv[5] = state[5] ^ state[13];
  cv[6] = state[6] ^ state[14];
  cv[7] = state[7] ^ state[15];
}
void blake3_compress_xof_portable(const uint32_t cv[8],
                                  const uint8_t block[BLAKE3_BLOCK_LEN],
                                  uint8_t block_len, uint64_t counter,
                                  uint8_t flags, uint8_t out[64]) {
  uint32_t state[16];
  compress_pre(state, cv, block, block_len, counter, flags);
  store32(&out[0 * 4], state[0] ^ state[8]);
  store32(&out[1 * 4], state[1] ^ state[9]);
  store32(&out[2 * 4], state[2] ^ state[10]);
  store32(&out[3 * 4], state[3] ^ state[11]);
  store32(&out[4 * 4], state[4] ^ state[12]);
  store32(&out[5 * 4], state[5] ^ state[13]);
  store32(&out[6 * 4], state[6] ^ state[14]);
  store32(&out[7 * 4], state[7] ^ state[15]);
  store32(&out[8 * 4], state[8] ^ cv[0]);
  store32(&out[9 * 4], state[9] ^ cv[1]);
  store32(&out[10 * 4], state[10] ^ cv[2]);
  store32(&out[11 * 4], state[11] ^ cv[3]);
  store32(&out[12 * 4], state[12] ^ cv[4]);
  store32(&out[13 * 4], state[13] ^ cv[5]);
  store32(&out[14 * 4], state[14] ^ cv[6]);
  store32(&out[15 * 4], state[15] ^ cv[7]);
}
INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
                              const uint32_t key[8], uint64_t counter,
                              uint8_t flags, uint8_t flags_start,
                              uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
  uint32_t cv[8];
  memcpy(cv, key, BLAKE3_KEY_LEN);
  uint8_t block_flags = flags | flags_start;
  while (blocks > 0) {
    if (blocks == 1) {
      block_flags |= flags_end;
    }
    blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
                                      block_flags);
    input = &input[BLAKE3_BLOCK_LEN];
    blocks -= 1;
    block_flags = flags;
  }
  store_cv_words(out, cv);
}
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
                               size_t blocks, const uint32_t key[8],
                               uint64_t counter, bool increment_counter,
                               uint8_t flags, uint8_t flags_start,
                               uint8_t flags_end, uint8_t *out) {
  while (num_inputs > 0) {
    hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
                      flags_end, out);
    if (increment_counter) {
      counter += 1;
    }
    inputs += 1;
    num_inputs -= 1;
    out = &out[BLAKE3_OUT_LEN];
  }
}
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
                             uint8_t flags) {
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
  self->chunk_counter = 0;
  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
  self->buf_len = 0;
  self->blocks_compressed = 0;
  self->flags = flags;
}
INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
                              uint64_t chunk_counter) {
  memcpy(self->cv, key, BLAKE3_KEY_LEN);
  self->chunk_counter = chunk_counter;
  self->blocks_compressed = 0;
  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
  self->buf_len = 0;
}
INLINE size_t chunk_state_len(const blake3_chunk_state *self) {
  return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
         ((size_t)self->buf_len);
}
INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,
                                   const uint8_t *input, size_t input_len) {
  size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
  if (take > input_len) {
    take = input_len;
  }
  uint8_t *dest = self->buf + ((size_t)self->buf_len);
  memcpy(dest, input, take);
  self->buf_len += (uint8_t)take;
  return take;
}
INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
  if (self->blocks_compressed == 0) {
    return CHUNK_START;
  } else {
    return 0;
  }
}
typedef struct {
  uint32_t input_cv[8];
  uint64_t counter;
  uint8_t block[BLAKE3_BLOCK_LEN];
  uint8_t block_len;
  uint8_t flags;
} output_t;
INLINE output_t make_output(const uint32_t input_cv[8],
                            const uint8_t block[BLAKE3_BLOCK_LEN],
                            uint8_t block_len, uint64_t counter,
                            uint8_t flags) {
  output_t ret;
  memcpy(ret.input_cv, input_cv, 32);
  memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
  ret.block_len = block_len;
  ret.counter = counter;
  ret.flags = flags;
  return ret;
}
INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
  uint32_t cv_words[8];
  memcpy(cv_words, self->input_cv, 32);
  blake3_compress_in_place(cv_words, self->block, self->block_len,
                           self->counter, self->flags);
  store_cv_words(cv, cv_words);
}
INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
                              size_t out_len) {
  uint64_t output_block_counter = seek / 64;
  size_t offset_within_block = seek % 64;
  uint8_t wide_buf[64];
  while (out_len > 0) {
    blake3_compress_xof(self->input_cv, self->block, self->block_len,
                        output_block_counter, self->flags | ROOT, wide_buf);
    size_t available_bytes = 64 - offset_within_block;
    size_t memcpy_len;
    if (out_len > available_bytes) {
      memcpy_len = available_bytes;
    } else {
      memcpy_len = out_len;
    }
    memcpy(out, wide_buf + offset_within_block, memcpy_len);
    out += memcpy_len;
    out_len -= memcpy_len;
    output_block_counter += 1;
    offset_within_block = 0;
  }
}
INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
                               size_t input_len) {
  if (self->buf_len > 0) {
    size_t take = chunk_state_fill_buf(self, input, input_len);
    input += take;
    input_len -= take;
    if (input_len > 0) {
      blake3_compress_in_place(
          self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
          self->flags | chunk_state_maybe_start_flag(self));
      self->blocks_compressed += 1;
      self->buf_len = 0;
      memset(self->buf, 0, BLAKE3_BLOCK_LEN);
    }
  }
  while (input_len > BLAKE3_BLOCK_LEN) {
    blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
                             self->chunk_counter,
                             self->flags | chunk_state_maybe_start_flag(self));
    self->blocks_compressed += 1;
    input += BLAKE3_BLOCK_LEN;
    input_len -= BLAKE3_BLOCK_LEN;
  }
  size_t take = chunk_state_fill_buf(self, input, input_len);
  input += take;
  input_len -= take;
}
INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
  uint8_t block_flags =
      self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
  return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
                     block_flags);
}
INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
                              const uint32_t key[8], uint8_t flags) {
  return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
}
INLINE size_t left_len(size_t content_len) {
  size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
  return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
}
INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
                                       const uint32_t key[8],
                                       uint64_t chunk_counter, uint8_t flags,
                                       uint8_t *out) {
#if defined(BLAKE3_TESTING)
  assert(0 < input_len);
  assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);
#endif
  const uint8_t *chunks_array[MAX_SIMD_DEGREE];
  size_t input_position = 0;
  size_t chunks_array_len = 0;
  while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
    chunks_array[chunks_array_len] = &input[input_position];
    input_position += BLAKE3_CHUNK_LEN;
    chunks_array_len += 1;
  }
  blake3_hash_many(chunks_array, chunks_array_len,
                   BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
                   true, flags, CHUNK_START, CHUNK_END, out);
  if (input_len > input_position) {
    uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
    blake3_chunk_state chunk_state;
    chunk_state_init(&chunk_state, key, flags);
    chunk_state.chunk_counter = counter;
    chunk_state_update(&chunk_state, &input[input_position],
                       input_len - input_position);
    output_t output = chunk_state_output(&chunk_state);
    output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
    return chunks_array_len + 1;
  } else {
    return chunks_array_len;
  }
}
INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
                                        size_t num_chaining_values,
                                        const uint32_t key[8], uint8_t flags,
                                        uint8_t *out) {
#if defined(BLAKE3_TESTING)
  assert(2 <= num_chaining_values);
  assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);
#endif
  const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
  size_t parents_array_len = 0;
  while (num_chaining_values - (2 * parents_array_len) >= 2) {
    parents_array[parents_array_len] =
        &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
    parents_array_len += 1;
  }
  blake3_hash_many(parents_array, parents_array_len, 1, key,
                   0,
                   false, flags | PARENT,
                   0,
                   0,
                   out);
  if (num_chaining_values > 2 * parents_array_len) {
    memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
           &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
           BLAKE3_OUT_LEN);
    return parents_array_len + 1;
  } else {
    return parents_array_len;
  }
}
static size_t blake3_compress_subtree_wide(const uint8_t *input,
                                           size_t input_len,
                                           const uint32_t key[8],
                                           uint64_t chunk_counter,
                                           uint8_t flags, uint8_t *out) {
  if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
    return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
                                    out);
  }
  size_t left_input_len = left_len(input_len);
  size_t right_input_len = input_len - left_input_len;
  const uint8_t *right_input = &input[left_input_len];
  uint64_t right_chunk_counter =
      chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
  uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  size_t degree = blake3_simd_degree();
  if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
    degree = 2;
  }
  uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
  size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
                                               chunk_counter, flags, cv_array);
  size_t right_n = blake3_compress_subtree_wide(
      right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
  if (left_n == 1) {
    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
    return 2;
  }
  size_t num_chaining_values = left_n + right_n;
  return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
                                   out);
}
INLINE void compress_subtree_to_parent_node(
    const uint8_t *input, size_t input_len, const uint32_t key[8],
    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
#if defined(BLAKE3_TESTING)
  assert(input_len > BLAKE3_CHUNK_LEN);
#endif
  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
                                                chunk_counter, flags, cv_array);
  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
  while (num_cvs > 2) {
    num_cvs =
        compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
    memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
  }
  memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
}
INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
                             uint8_t flags) {
  memcpy(self->key, key, BLAKE3_KEY_LEN);
  chunk_state_init(&self->chunk, key, flags);
  self->cv_stack_len = 0;
}
void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
                                       size_t context_len) {
  blake3_hasher context_hasher;
  hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
  blake3_hasher_update(&context_hasher, context, context_len);
  uint8_t context_key[BLAKE3_KEY_LEN];
  blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
  uint32_t context_key_words[8];
  load_key_words(context_key, context_key_words);
  hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
}
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
  blake3_hasher_init_derive_key_raw(self, context, strlen(context));
}
INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
  size_t post_merge_stack_len = (size_t)popcnt(total_len);
  while (self->cv_stack_len > post_merge_stack_len) {
    uint8_t *parent_node =
        &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
    output_t output = parent_output(parent_node, self->key, self->chunk.flags);
    output_chaining_value(&output, parent_node);
    self->cv_stack_len -= 1;
  }
}
INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
                           uint64_t chunk_counter) {
  hasher_merge_cv_stack(self, chunk_counter);
  memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
         BLAKE3_OUT_LEN);
  self->cv_stack_len += 1;
}
void blake3_hasher_update(blake3_hasher *self, const void *input,
                          size_t input_len) {
  if (input_len == 0) {
    return;
  }
  const uint8_t *input_bytes = (const uint8_t *)input;
  if (chunk_state_len(&self->chunk) > 0) {
    size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
    if (take > input_len) {
      take = input_len;
    }
    chunk_state_update(&self->chunk, input_bytes, take);
    input_bytes += take;
    input_len -= take;
    if (input_len > 0) {
      output_t output = chunk_state_output(&self->chunk);
      uint8_t chunk_cv[32];
      output_chaining_value(&output, chunk_cv);
      hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
      chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
    } else {
      return;
    }
  }
  while (input_len > BLAKE3_CHUNK_LEN) {
    size_t subtree_len = round_down_to_power_of_2(input_len);
    uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
    while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
      subtree_len /= 2;
    }
    uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
    if (subtree_len <= BLAKE3_CHUNK_LEN) {
      blake3_chunk_state chunk_state;
      chunk_state_init(&chunk_state, self->key, self->chunk.flags);
      chunk_state.chunk_counter = self->chunk.chunk_counter;
      chunk_state_update(&chunk_state, input_bytes, subtree_len);
      output_t output = chunk_state_output(&chunk_state);
      uint8_t cv[BLAKE3_OUT_LEN];
      output_chaining_value(&output, cv);
      hasher_push_cv(self, cv, chunk_state.chunk_counter);
    } else {
      uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
      compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
                                      self->chunk.chunk_counter,
                                      self->chunk.flags, cv_pair);
      hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
      hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
                     self->chunk.chunk_counter + (subtree_chunks / 2));
    }
    self->chunk.chunk_counter += subtree_chunks;
    input_bytes += subtree_len;
    input_len -= subtree_len;
  }
  if (input_len > 0) {
    chunk_state_update(&self->chunk, input_bytes, input_len);
    hasher_merge_cv_stack(self, self->chunk.chunk_counter);
  }
}
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
                            size_t out_len) {
  blake3_hasher_finalize_seek(self, 0, out, out_len);
}
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
                                 uint8_t *out, size_t out_len) {
  if (out_len == 0) {
    return;
  }
  if (self->cv_stack_len == 0) {
    output_t output = chunk_state_output(&self->chunk);
    output_root_bytes(&output, seek, out, out_len);
    return;
  }
  output_t output;
  size_t cvs_remaining;
  if (chunk_state_len(&self->chunk) > 0) {
    cvs_remaining = self->cv_stack_len;
    output = chunk_state_output(&self->chunk);
  } else {
    cvs_remaining = self->cv_stack_len - 2;
    output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
                           self->chunk.flags);
  }
  while (cvs_remaining > 0) {
    cvs_remaining -= 1;
    uint8_t parent_block[BLAKE3_BLOCK_LEN];
    memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
    output_chaining_value(&output, &parent_block[32]);
    output = parent_output(parent_block, self->key, self->chunk.flags);
  }
  output_root_bytes(&output, seek, out, out_len);
}
#endif ///HWBLAKE3
/// LICENSE_END.15




/// LICENSE_START.16
/**
 * The Whirlpool hashing function.
 *
 * <P>
 * <b>References</b>
 *
 * <P>
 * The Whirlpool algorithm was developed by
 * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
 * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
 */
typedef signed char s8;
typedef unsigned char u8;
#if UINT_MAX >= 4294967295UL
typedef signed short s16;
typedef signed int s32;
typedef unsigned short u16;
typedef unsigned int u32;
#define ONE32   0xffffffffU
#else
typedef signed int s16;
typedef signed long s32;
typedef unsigned int u16;
typedef unsigned long u32;
#define ONE32   0xffffffffUL
#endif
#define ONE8    0xffU
#define ONE16   0xffffU
#define T8(x)   ((x) & ONE8)
#define T16(x)  ((x) & ONE16)
#define T32(x)  ((x) & ONE32)
#ifdef _MSC_VER
typedef unsigned __int64 u64;
typedef signed __int64 s64;
#define LL(v)   (v##i64)
#define ONE64   LL(0xffffffffffffffff)
#else
typedef unsigned long long u64;
typedef signed long long s64;
#define LL(v)   (v##ULL)
#define ONE64   LL(0xffffffffffffffff)
#endif
#define T64(x)  ((x) & ONE64)
#define ROTR64(v, n)   (((v) >> (n)) | T64((v) << (64 - (n))))
#define U8TO32_BIG(c)  (((u32)T8(*(c)) << 24) | ((u32)T8(*((c) + 1)) << 16) | ((u32)T8(*((c) + 2)) << 8) | ((u32)T8(*((c) + 3))))
#define U8TO32_LITTLE(c)  (((u32)T8(*(c))) | ((u32)T8(*((c) + 1)) << 8) | (u32)T8(*((c) + 2)) << 16) | ((u32)T8(*((c) + 3)) << 24))
#define U32TO8_BIG(c, v)    do { u32 x = (v); u8 *d = (c); d[0] = T8(x >> 24); d[1] = T8(x >> 16); d[2] = T8(x >> 8); d[3] = T8(x); } while (0)
#define U32TO8_LITTLE(c, v)    do { u32 x = (v); u8 *d = (c); d[0] = T8(x); d[1] = T8(x >> 8); d[2] = T8(x >> 16); d[3] = T8(x >> 24); } while (0)
#define ROTL32(v, n)   (T32((v) << (n)) | ((v) >> (32 - (n))))
#define DIGESTBYTES 64
#define DIGESTBITS  (8*DIGESTBYTES)
#define WBLOCKBYTES 64
#define WBLOCKBITS  (8*WBLOCKBYTES)
#define LENGTHBYTES 32
#define LENGTHBITS  (8*LENGTHBYTES)
typedef struct NESSIEstruct {
	u8  bitLength[LENGTHBYTES];
	u8  buffer[WBLOCKBYTES];
	int bufferBits;
	int bufferPos;
	u64 hash[DIGESTBYTES/8];
} NESSIEstruct;
#define WHIRLPOOL_R 10
static const u64 C0[256] = {
    LL(0x18186018c07830d8), LL(0x23238c2305af4626), LL(0xc6c63fc67ef991b8), LL(0xe8e887e8136fcdfb),
    LL(0x878726874ca113cb), LL(0xb8b8dab8a9626d11), LL(0x0101040108050209), LL(0x4f4f214f426e9e0d),
    LL(0x3636d836adee6c9b), LL(0xa6a6a2a6590451ff), LL(0xd2d26fd2debdb90c), LL(0xf5f5f3f5fb06f70e),
    LL(0x7979f979ef80f296), LL(0x6f6fa16f5fcede30), LL(0x91917e91fcef3f6d), LL(0x52525552aa07a4f8),
    LL(0x60609d6027fdc047), LL(0xbcbccabc89766535), LL(0x9b9b569baccd2b37), LL(0x8e8e028e048c018a),
    LL(0xa3a3b6a371155bd2), LL(0x0c0c300c603c186c), LL(0x7b7bf17bff8af684), LL(0x3535d435b5e16a80),
    LL(0x1d1d741de8693af5), LL(0xe0e0a7e05347ddb3), LL(0xd7d77bd7f6acb321), LL(0xc2c22fc25eed999c),
    LL(0x2e2eb82e6d965c43), LL(0x4b4b314b627a9629), LL(0xfefedffea321e15d), LL(0x575741578216aed5),
    LL(0x15155415a8412abd), LL(0x7777c1779fb6eee8), LL(0x3737dc37a5eb6e92), LL(0xe5e5b3e57b56d79e),
    LL(0x9f9f469f8cd92313), LL(0xf0f0e7f0d317fd23), LL(0x4a4a354a6a7f9420), LL(0xdada4fda9e95a944),
    LL(0x58587d58fa25b0a2), LL(0xc9c903c906ca8fcf), LL(0x2929a429558d527c), LL(0x0a0a280a5022145a),
    LL(0xb1b1feb1e14f7f50), LL(0xa0a0baa0691a5dc9), LL(0x6b6bb16b7fdad614), LL(0x85852e855cab17d9),
    LL(0xbdbdcebd8173673c), LL(0x5d5d695dd234ba8f), LL(0x1010401080502090), LL(0xf4f4f7f4f303f507),
    LL(0xcbcb0bcb16c08bdd), LL(0x3e3ef83eedc67cd3), LL(0x0505140528110a2d), LL(0x676781671fe6ce78),
    LL(0xe4e4b7e47353d597), LL(0x27279c2725bb4e02), LL(0x4141194132588273), LL(0x8b8b168b2c9d0ba7),
    LL(0xa7a7a6a7510153f6), LL(0x7d7de97dcf94fab2), LL(0x95956e95dcfb3749), LL(0xd8d847d88e9fad56),
    LL(0xfbfbcbfb8b30eb70), LL(0xeeee9fee2371c1cd), LL(0x7c7ced7cc791f8bb), LL(0x6666856617e3cc71),
    LL(0xdddd53dda68ea77b), LL(0x17175c17b84b2eaf), LL(0x4747014702468e45), LL(0x9e9e429e84dc211a),
    LL(0xcaca0fca1ec589d4), LL(0x2d2db42d75995a58), LL(0xbfbfc6bf9179632e), LL(0x07071c07381b0e3f),
    LL(0xadad8ead012347ac), LL(0x5a5a755aea2fb4b0), LL(0x838336836cb51bef), LL(0x3333cc3385ff66b6),
    LL(0x636391633ff2c65c), LL(0x02020802100a0412), LL(0xaaaa92aa39384993), LL(0x7171d971afa8e2de),
    LL(0xc8c807c80ecf8dc6), LL(0x19196419c87d32d1), LL(0x494939497270923b), LL(0xd9d943d9869aaf5f),
    LL(0xf2f2eff2c31df931), LL(0xe3e3abe34b48dba8), LL(0x5b5b715be22ab6b9), LL(0x88881a8834920dbc),
    LL(0x9a9a529aa4c8293e), LL(0x262698262dbe4c0b), LL(0x3232c8328dfa64bf), LL(0xb0b0fab0e94a7d59),
    LL(0xe9e983e91b6acff2), LL(0x0f0f3c0f78331e77), LL(0xd5d573d5e6a6b733), LL(0x80803a8074ba1df4),
    LL(0xbebec2be997c6127), LL(0xcdcd13cd26de87eb), LL(0x3434d034bde46889), LL(0x48483d487a759032),
    LL(0xffffdbffab24e354), LL(0x7a7af57af78ff48d), LL(0x90907a90f4ea3d64), LL(0x5f5f615fc23ebe9d),
    LL(0x202080201da0403d), LL(0x6868bd6867d5d00f), LL(0x1a1a681ad07234ca), LL(0xaeae82ae192c41b7),
    LL(0xb4b4eab4c95e757d), LL(0x54544d549a19a8ce), LL(0x93937693ece53b7f), LL(0x222288220daa442f),
    LL(0x64648d6407e9c863), LL(0xf1f1e3f1db12ff2a), LL(0x7373d173bfa2e6cc), LL(0x12124812905a2482),
    LL(0x40401d403a5d807a), LL(0x0808200840281048), LL(0xc3c32bc356e89b95), LL(0xecec97ec337bc5df),
    LL(0xdbdb4bdb9690ab4d), LL(0xa1a1bea1611f5fc0), LL(0x8d8d0e8d1c830791), LL(0x3d3df43df5c97ac8),
    LL(0x97976697ccf1335b), LL(0x0000000000000000), LL(0xcfcf1bcf36d483f9), LL(0x2b2bac2b4587566e),
    LL(0x7676c57697b3ece1), LL(0x8282328264b019e6), LL(0xd6d67fd6fea9b128), LL(0x1b1b6c1bd87736c3),
    LL(0xb5b5eeb5c15b7774), LL(0xafaf86af112943be), LL(0x6a6ab56a77dfd41d), LL(0x50505d50ba0da0ea),
    LL(0x45450945124c8a57), LL(0xf3f3ebf3cb18fb38), LL(0x3030c0309df060ad), LL(0xefef9bef2b74c3c4),
    LL(0x3f3ffc3fe5c37eda), LL(0x55554955921caac7), LL(0xa2a2b2a2791059db), LL(0xeaea8fea0365c9e9),
    LL(0x656589650fecca6a), LL(0xbabad2bab9686903), LL(0x2f2fbc2f65935e4a), LL(0xc0c027c04ee79d8e),
    LL(0xdede5fdebe81a160), LL(0x1c1c701ce06c38fc), LL(0xfdfdd3fdbb2ee746), LL(0x4d4d294d52649a1f),
    LL(0x92927292e4e03976), LL(0x7575c9758fbceafa), LL(0x06061806301e0c36), LL(0x8a8a128a249809ae),
    LL(0xb2b2f2b2f940794b), LL(0xe6e6bfe66359d185), LL(0x0e0e380e70361c7e), LL(0x1f1f7c1ff8633ee7),
    LL(0x6262956237f7c455), LL(0xd4d477d4eea3b53a), LL(0xa8a89aa829324d81), LL(0x96966296c4f43152),
    LL(0xf9f9c3f99b3aef62), LL(0xc5c533c566f697a3), LL(0x2525942535b14a10), LL(0x59597959f220b2ab),
    LL(0x84842a8454ae15d0), LL(0x7272d572b7a7e4c5), LL(0x3939e439d5dd72ec), LL(0x4c4c2d4c5a619816),
    LL(0x5e5e655eca3bbc94), LL(0x7878fd78e785f09f), LL(0x3838e038ddd870e5), LL(0x8c8c0a8c14860598),
    LL(0xd1d163d1c6b2bf17), LL(0xa5a5aea5410b57e4), LL(0xe2e2afe2434dd9a1), LL(0x616199612ff8c24e),
    LL(0xb3b3f6b3f1457b42), LL(0x2121842115a54234), LL(0x9c9c4a9c94d62508), LL(0x1e1e781ef0663cee),
    LL(0x4343114322528661), LL(0xc7c73bc776fc93b1), LL(0xfcfcd7fcb32be54f), LL(0x0404100420140824),
    LL(0x51515951b208a2e3), LL(0x99995e99bcc72f25), LL(0x6d6da96d4fc4da22), LL(0x0d0d340d68391a65),
    LL(0xfafacffa8335e979), LL(0xdfdf5bdfb684a369), LL(0x7e7ee57ed79bfca9), LL(0x242490243db44819),
    LL(0x3b3bec3bc5d776fe), LL(0xabab96ab313d4b9a), LL(0xcece1fce3ed181f0), LL(0x1111441188552299),
    LL(0x8f8f068f0c890383), LL(0x4e4e254e4a6b9c04), LL(0xb7b7e6b7d1517366), LL(0xebeb8beb0b60cbe0),
    LL(0x3c3cf03cfdcc78c1), LL(0x81813e817cbf1ffd), LL(0x94946a94d4fe3540), LL(0xf7f7fbf7eb0cf31c),
    LL(0xb9b9deb9a1676f18), LL(0x13134c13985f268b), LL(0x2c2cb02c7d9c5851), LL(0xd3d36bd3d6b8bb05),
    LL(0xe7e7bbe76b5cd38c), LL(0x6e6ea56e57cbdc39), LL(0xc4c437c46ef395aa), LL(0x03030c03180f061b),
    LL(0x565645568a13acdc), LL(0x44440d441a49885e), LL(0x7f7fe17fdf9efea0), LL(0xa9a99ea921374f88),
    LL(0x2a2aa82a4d825467), LL(0xbbbbd6bbb16d6b0a), LL(0xc1c123c146e29f87), LL(0x53535153a202a6f1),
    LL(0xdcdc57dcae8ba572), LL(0x0b0b2c0b58271653), LL(0x9d9d4e9d9cd32701), LL(0x6c6cad6c47c1d82b),
    LL(0x3131c43195f562a4), LL(0x7474cd7487b9e8f3), LL(0xf6f6fff6e309f115), LL(0x464605460a438c4c),
    LL(0xacac8aac092645a5), LL(0x89891e893c970fb5), LL(0x14145014a04428b4), LL(0xe1e1a3e15b42dfba),
    LL(0x16165816b04e2ca6), LL(0x3a3ae83acdd274f7), LL(0x6969b9696fd0d206), LL(0x09092409482d1241),
    LL(0x7070dd70a7ade0d7), LL(0xb6b6e2b6d954716f), LL(0xd0d067d0ceb7bd1e), LL(0xeded93ed3b7ec7d6),
    LL(0xcccc17cc2edb85e2), LL(0x424215422a578468), LL(0x98985a98b4c22d2c), LL(0xa4a4aaa4490e55ed),
    LL(0x2828a0285d885075), LL(0x5c5c6d5cda31b886), LL(0xf8f8c7f8933fed6b), LL(0x8686228644a411c2),
};
static const u64 C1[256] = {
    LL(0xd818186018c07830), LL(0x2623238c2305af46), LL(0xb8c6c63fc67ef991), LL(0xfbe8e887e8136fcd),
    LL(0xcb878726874ca113), LL(0x11b8b8dab8a9626d), LL(0x0901010401080502), LL(0x0d4f4f214f426e9e),
    LL(0x9b3636d836adee6c), LL(0xffa6a6a2a6590451), LL(0x0cd2d26fd2debdb9), LL(0x0ef5f5f3f5fb06f7),
    LL(0x967979f979ef80f2), LL(0x306f6fa16f5fcede), LL(0x6d91917e91fcef3f), LL(0xf852525552aa07a4),
    LL(0x4760609d6027fdc0), LL(0x35bcbccabc897665), LL(0x379b9b569baccd2b), LL(0x8a8e8e028e048c01),
    LL(0xd2a3a3b6a371155b), LL(0x6c0c0c300c603c18), LL(0x847b7bf17bff8af6), LL(0x803535d435b5e16a),
    LL(0xf51d1d741de8693a), LL(0xb3e0e0a7e05347dd), LL(0x21d7d77bd7f6acb3), LL(0x9cc2c22fc25eed99),
    LL(0x432e2eb82e6d965c), LL(0x294b4b314b627a96), LL(0x5dfefedffea321e1), LL(0xd5575741578216ae),
    LL(0xbd15155415a8412a), LL(0xe87777c1779fb6ee), LL(0x923737dc37a5eb6e), LL(0x9ee5e5b3e57b56d7),
    LL(0x139f9f469f8cd923), LL(0x23f0f0e7f0d317fd), LL(0x204a4a354a6a7f94), LL(0x44dada4fda9e95a9),
    LL(0xa258587d58fa25b0), LL(0xcfc9c903c906ca8f), LL(0x7c2929a429558d52), LL(0x5a0a0a280a502214),
    LL(0x50b1b1feb1e14f7f), LL(0xc9a0a0baa0691a5d), LL(0x146b6bb16b7fdad6), LL(0xd985852e855cab17),
    LL(0x3cbdbdcebd817367), LL(0x8f5d5d695dd234ba), LL(0x9010104010805020), LL(0x07f4f4f7f4f303f5),
    LL(0xddcbcb0bcb16c08b), LL(0xd33e3ef83eedc67c), LL(0x2d0505140528110a), LL(0x78676781671fe6ce),
    LL(0x97e4e4b7e47353d5), LL(0x0227279c2725bb4e), LL(0x7341411941325882), LL(0xa78b8b168b2c9d0b),
    LL(0xf6a7a7a6a7510153), LL(0xb27d7de97dcf94fa), LL(0x4995956e95dcfb37), LL(0x56d8d847d88e9fad),
    LL(0x70fbfbcbfb8b30eb), LL(0xcdeeee9fee2371c1), LL(0xbb7c7ced7cc791f8), LL(0x716666856617e3cc),
    LL(0x7bdddd53dda68ea7), LL(0xaf17175c17b84b2e), LL(0x454747014702468e), LL(0x1a9e9e429e84dc21),
    LL(0xd4caca0fca1ec589), LL(0x582d2db42d75995a), LL(0x2ebfbfc6bf917963), LL(0x3f07071c07381b0e),
    LL(0xacadad8ead012347), LL(0xb05a5a755aea2fb4), LL(0xef838336836cb51b), LL(0xb63333cc3385ff66),
    LL(0x5c636391633ff2c6), LL(0x1202020802100a04), LL(0x93aaaa92aa393849), LL(0xde7171d971afa8e2),
    LL(0xc6c8c807c80ecf8d), LL(0xd119196419c87d32), LL(0x3b49493949727092), LL(0x5fd9d943d9869aaf),
    LL(0x31f2f2eff2c31df9), LL(0xa8e3e3abe34b48db), LL(0xb95b5b715be22ab6), LL(0xbc88881a8834920d),
    LL(0x3e9a9a529aa4c829), LL(0x0b262698262dbe4c), LL(0xbf3232c8328dfa64), LL(0x59b0b0fab0e94a7d),
    LL(0xf2e9e983e91b6acf), LL(0x770f0f3c0f78331e), LL(0x33d5d573d5e6a6b7), LL(0xf480803a8074ba1d),
    LL(0x27bebec2be997c61), LL(0xebcdcd13cd26de87), LL(0x893434d034bde468), LL(0x3248483d487a7590),
    LL(0x54ffffdbffab24e3), LL(0x8d7a7af57af78ff4), LL(0x6490907a90f4ea3d), LL(0x9d5f5f615fc23ebe),
    LL(0x3d202080201da040), LL(0x0f6868bd6867d5d0), LL(0xca1a1a681ad07234), LL(0xb7aeae82ae192c41),
    LL(0x7db4b4eab4c95e75), LL(0xce54544d549a19a8), LL(0x7f93937693ece53b), LL(0x2f222288220daa44),
    LL(0x6364648d6407e9c8), LL(0x2af1f1e3f1db12ff), LL(0xcc7373d173bfa2e6), LL(0x8212124812905a24),
    LL(0x7a40401d403a5d80), LL(0x4808082008402810), LL(0x95c3c32bc356e89b), LL(0xdfecec97ec337bc5),
    LL(0x4ddbdb4bdb9690ab), LL(0xc0a1a1bea1611f5f), LL(0x918d8d0e8d1c8307), LL(0xc83d3df43df5c97a),
    LL(0x5b97976697ccf133), LL(0x0000000000000000), LL(0xf9cfcf1bcf36d483), LL(0x6e2b2bac2b458756),
    LL(0xe17676c57697b3ec), LL(0xe68282328264b019), LL(0x28d6d67fd6fea9b1), LL(0xc31b1b6c1bd87736),
    LL(0x74b5b5eeb5c15b77), LL(0xbeafaf86af112943), LL(0x1d6a6ab56a77dfd4), LL(0xea50505d50ba0da0),
    LL(0x5745450945124c8a), LL(0x38f3f3ebf3cb18fb), LL(0xad3030c0309df060), LL(0xc4efef9bef2b74c3),
    LL(0xda3f3ffc3fe5c37e), LL(0xc755554955921caa), LL(0xdba2a2b2a2791059), LL(0xe9eaea8fea0365c9),
    LL(0x6a656589650fecca), LL(0x03babad2bab96869), LL(0x4a2f2fbc2f65935e), LL(0x8ec0c027c04ee79d),
    LL(0x60dede5fdebe81a1), LL(0xfc1c1c701ce06c38), LL(0x46fdfdd3fdbb2ee7), LL(0x1f4d4d294d52649a),
    LL(0x7692927292e4e039), LL(0xfa7575c9758fbcea), LL(0x3606061806301e0c), LL(0xae8a8a128a249809),
    LL(0x4bb2b2f2b2f94079), LL(0x85e6e6bfe66359d1), LL(0x7e0e0e380e70361c), LL(0xe71f1f7c1ff8633e),
    LL(0x556262956237f7c4), LL(0x3ad4d477d4eea3b5), LL(0x81a8a89aa829324d), LL(0x5296966296c4f431),
    LL(0x62f9f9c3f99b3aef), LL(0xa3c5c533c566f697), LL(0x102525942535b14a), LL(0xab59597959f220b2),
    LL(0xd084842a8454ae15), LL(0xc57272d572b7a7e4), LL(0xec3939e439d5dd72), LL(0x164c4c2d4c5a6198),
    LL(0x945e5e655eca3bbc), LL(0x9f7878fd78e785f0), LL(0xe53838e038ddd870), LL(0x988c8c0a8c148605),
    LL(0x17d1d163d1c6b2bf), LL(0xe4a5a5aea5410b57), LL(0xa1e2e2afe2434dd9), LL(0x4e616199612ff8c2),
    LL(0x42b3b3f6b3f1457b), LL(0x342121842115a542), LL(0x089c9c4a9c94d625), LL(0xee1e1e781ef0663c),
    LL(0x6143431143225286), LL(0xb1c7c73bc776fc93), LL(0x4ffcfcd7fcb32be5), LL(0x2404041004201408),
    LL(0xe351515951b208a2), LL(0x2599995e99bcc72f), LL(0x226d6da96d4fc4da), LL(0x650d0d340d68391a),
    LL(0x79fafacffa8335e9), LL(0x69dfdf5bdfb684a3), LL(0xa97e7ee57ed79bfc), LL(0x19242490243db448),
    LL(0xfe3b3bec3bc5d776), LL(0x9aabab96ab313d4b), LL(0xf0cece1fce3ed181), LL(0x9911114411885522),
    LL(0x838f8f068f0c8903), LL(0x044e4e254e4a6b9c), LL(0x66b7b7e6b7d15173), LL(0xe0ebeb8beb0b60cb),
    LL(0xc13c3cf03cfdcc78), LL(0xfd81813e817cbf1f), LL(0x4094946a94d4fe35), LL(0x1cf7f7fbf7eb0cf3),
    LL(0x18b9b9deb9a1676f), LL(0x8b13134c13985f26), LL(0x512c2cb02c7d9c58), LL(0x05d3d36bd3d6b8bb),
    LL(0x8ce7e7bbe76b5cd3), LL(0x396e6ea56e57cbdc), LL(0xaac4c437c46ef395), LL(0x1b03030c03180f06),
    LL(0xdc565645568a13ac), LL(0x5e44440d441a4988), LL(0xa07f7fe17fdf9efe), LL(0x88a9a99ea921374f),
    LL(0x672a2aa82a4d8254), LL(0x0abbbbd6bbb16d6b), LL(0x87c1c123c146e29f), LL(0xf153535153a202a6),
    LL(0x72dcdc57dcae8ba5), LL(0x530b0b2c0b582716), LL(0x019d9d4e9d9cd327), LL(0x2b6c6cad6c47c1d8),
    LL(0xa43131c43195f562), LL(0xf37474cd7487b9e8), LL(0x15f6f6fff6e309f1), LL(0x4c464605460a438c),
    LL(0xa5acac8aac092645), LL(0xb589891e893c970f), LL(0xb414145014a04428), LL(0xbae1e1a3e15b42df),
    LL(0xa616165816b04e2c), LL(0xf73a3ae83acdd274), LL(0x066969b9696fd0d2), LL(0x4109092409482d12),
    LL(0xd77070dd70a7ade0), LL(0x6fb6b6e2b6d95471), LL(0x1ed0d067d0ceb7bd), LL(0xd6eded93ed3b7ec7),
    LL(0xe2cccc17cc2edb85), LL(0x68424215422a5784), LL(0x2c98985a98b4c22d), LL(0xeda4a4aaa4490e55),
    LL(0x752828a0285d8850), LL(0x865c5c6d5cda31b8), LL(0x6bf8f8c7f8933fed), LL(0xc28686228644a411),
};
static const u64 C2[256] = {
    LL(0x30d818186018c078), LL(0x462623238c2305af), LL(0x91b8c6c63fc67ef9), LL(0xcdfbe8e887e8136f),
    LL(0x13cb878726874ca1), LL(0x6d11b8b8dab8a962), LL(0x0209010104010805), LL(0x9e0d4f4f214f426e),
    LL(0x6c9b3636d836adee), LL(0x51ffa6a6a2a65904), LL(0xb90cd2d26fd2debd), LL(0xf70ef5f5f3f5fb06),
    LL(0xf2967979f979ef80), LL(0xde306f6fa16f5fce), LL(0x3f6d91917e91fcef), LL(0xa4f852525552aa07),
    LL(0xc04760609d6027fd), LL(0x6535bcbccabc8976), LL(0x2b379b9b569baccd), LL(0x018a8e8e028e048c),
    LL(0x5bd2a3a3b6a37115), LL(0x186c0c0c300c603c), LL(0xf6847b7bf17bff8a), LL(0x6a803535d435b5e1),
    LL(0x3af51d1d741de869), LL(0xddb3e0e0a7e05347), LL(0xb321d7d77bd7f6ac), LL(0x999cc2c22fc25eed),
    LL(0x5c432e2eb82e6d96), LL(0x96294b4b314b627a), LL(0xe15dfefedffea321), LL(0xaed5575741578216),
    LL(0x2abd15155415a841), LL(0xeee87777c1779fb6), LL(0x6e923737dc37a5eb), LL(0xd79ee5e5b3e57b56),
    LL(0x23139f9f469f8cd9), LL(0xfd23f0f0e7f0d317), LL(0x94204a4a354a6a7f), LL(0xa944dada4fda9e95),
    LL(0xb0a258587d58fa25), LL(0x8fcfc9c903c906ca), LL(0x527c2929a429558d), LL(0x145a0a0a280a5022),
    LL(0x7f50b1b1feb1e14f), LL(0x5dc9a0a0baa0691a), LL(0xd6146b6bb16b7fda), LL(0x17d985852e855cab),
    LL(0x673cbdbdcebd8173), LL(0xba8f5d5d695dd234), LL(0x2090101040108050), LL(0xf507f4f4f7f4f303),
    LL(0x8bddcbcb0bcb16c0), LL(0x7cd33e3ef83eedc6), LL(0x0a2d050514052811), LL(0xce78676781671fe6),
    LL(0xd597e4e4b7e47353), LL(0x4e0227279c2725bb), LL(0x8273414119413258), LL(0x0ba78b8b168b2c9d),
    LL(0x53f6a7a7a6a75101), LL(0xfab27d7de97dcf94), LL(0x374995956e95dcfb), LL(0xad56d8d847d88e9f),
    LL(0xeb70fbfbcbfb8b30), LL(0xc1cdeeee9fee2371), LL(0xf8bb7c7ced7cc791), LL(0xcc716666856617e3),
    LL(0xa77bdddd53dda68e), LL(0x2eaf17175c17b84b), LL(0x8e45474701470246), LL(0x211a9e9e429e84dc),
    LL(0x89d4caca0fca1ec5), LL(0x5a582d2db42d7599), LL(0x632ebfbfc6bf9179), LL(0x0e3f07071c07381b),
    LL(0x47acadad8ead0123), LL(0xb4b05a5a755aea2f), LL(0x1bef838336836cb5), LL(0x66b63333cc3385ff),
    LL(0xc65c636391633ff2), LL(0x041202020802100a), LL(0x4993aaaa92aa3938), LL(0xe2de7171d971afa8),
    LL(0x8dc6c8c807c80ecf), LL(0x32d119196419c87d), LL(0x923b494939497270), LL(0xaf5fd9d943d9869a),
    LL(0xf931f2f2eff2c31d), LL(0xdba8e3e3abe34b48), LL(0xb6b95b5b715be22a), LL(0x0dbc88881a883492),
    LL(0x293e9a9a529aa4c8), LL(0x4c0b262698262dbe), LL(0x64bf3232c8328dfa), LL(0x7d59b0b0fab0e94a),
    LL(0xcff2e9e983e91b6a), LL(0x1e770f0f3c0f7833), LL(0xb733d5d573d5e6a6), LL(0x1df480803a8074ba),
    LL(0x6127bebec2be997c), LL(0x87ebcdcd13cd26de), LL(0x68893434d034bde4), LL(0x903248483d487a75),
    LL(0xe354ffffdbffab24), LL(0xf48d7a7af57af78f), LL(0x3d6490907a90f4ea), LL(0xbe9d5f5f615fc23e),
    LL(0x403d202080201da0), LL(0xd00f6868bd6867d5), LL(0x34ca1a1a681ad072), LL(0x41b7aeae82ae192c),
    LL(0x757db4b4eab4c95e), LL(0xa8ce54544d549a19), LL(0x3b7f93937693ece5), LL(0x442f222288220daa),
    LL(0xc86364648d6407e9), LL(0xff2af1f1e3f1db12), LL(0xe6cc7373d173bfa2), LL(0x248212124812905a),
    LL(0x807a40401d403a5d), LL(0x1048080820084028), LL(0x9b95c3c32bc356e8), LL(0xc5dfecec97ec337b),
    LL(0xab4ddbdb4bdb9690), LL(0x5fc0a1a1bea1611f), LL(0x07918d8d0e8d1c83), LL(0x7ac83d3df43df5c9),
    LL(0x335b97976697ccf1), LL(0x0000000000000000), LL(0x83f9cfcf1bcf36d4), LL(0x566e2b2bac2b4587),
    LL(0xece17676c57697b3), LL(0x19e68282328264b0), LL(0xb128d6d67fd6fea9), LL(0x36c31b1b6c1bd877),
    LL(0x7774b5b5eeb5c15b), LL(0x43beafaf86af1129), LL(0xd41d6a6ab56a77df), LL(0xa0ea50505d50ba0d),
    LL(0x8a5745450945124c), LL(0xfb38f3f3ebf3cb18), LL(0x60ad3030c0309df0), LL(0xc3c4efef9bef2b74),
    LL(0x7eda3f3ffc3fe5c3), LL(0xaac755554955921c), LL(0x59dba2a2b2a27910), LL(0xc9e9eaea8fea0365),
    LL(0xca6a656589650fec), LL(0x6903babad2bab968), LL(0x5e4a2f2fbc2f6593), LL(0x9d8ec0c027c04ee7),
    LL(0xa160dede5fdebe81), LL(0x38fc1c1c701ce06c), LL(0xe746fdfdd3fdbb2e), LL(0x9a1f4d4d294d5264),
    LL(0x397692927292e4e0), LL(0xeafa7575c9758fbc), LL(0x0c3606061806301e), LL(0x09ae8a8a128a2498),
    LL(0x794bb2b2f2b2f940), LL(0xd185e6e6bfe66359), LL(0x1c7e0e0e380e7036), LL(0x3ee71f1f7c1ff863),
    LL(0xc4556262956237f7), LL(0xb53ad4d477d4eea3), LL(0x4d81a8a89aa82932), LL(0x315296966296c4f4),
    LL(0xef62f9f9c3f99b3a), LL(0x97a3c5c533c566f6), LL(0x4a102525942535b1), LL(0xb2ab59597959f220),
    LL(0x15d084842a8454ae), LL(0xe4c57272d572b7a7), LL(0x72ec3939e439d5dd), LL(0x98164c4c2d4c5a61),
    LL(0xbc945e5e655eca3b), LL(0xf09f7878fd78e785), LL(0x70e53838e038ddd8), LL(0x05988c8c0a8c1486),
    LL(0xbf17d1d163d1c6b2), LL(0x57e4a5a5aea5410b), LL(0xd9a1e2e2afe2434d), LL(0xc24e616199612ff8),
    LL(0x7b42b3b3f6b3f145), LL(0x42342121842115a5), LL(0x25089c9c4a9c94d6), LL(0x3cee1e1e781ef066),
    LL(0x8661434311432252), LL(0x93b1c7c73bc776fc), LL(0xe54ffcfcd7fcb32b), LL(0x0824040410042014),
    LL(0xa2e351515951b208), LL(0x2f2599995e99bcc7), LL(0xda226d6da96d4fc4), LL(0x1a650d0d340d6839),
    LL(0xe979fafacffa8335), LL(0xa369dfdf5bdfb684), LL(0xfca97e7ee57ed79b), LL(0x4819242490243db4),
    LL(0x76fe3b3bec3bc5d7), LL(0x4b9aabab96ab313d), LL(0x81f0cece1fce3ed1), LL(0x2299111144118855),
    LL(0x03838f8f068f0c89), LL(0x9c044e4e254e4a6b), LL(0x7366b7b7e6b7d151), LL(0xcbe0ebeb8beb0b60),
    LL(0x78c13c3cf03cfdcc), LL(0x1ffd81813e817cbf), LL(0x354094946a94d4fe), LL(0xf31cf7f7fbf7eb0c),
    LL(0x6f18b9b9deb9a167), LL(0x268b13134c13985f), LL(0x58512c2cb02c7d9c), LL(0xbb05d3d36bd3d6b8),
    LL(0xd38ce7e7bbe76b5c), LL(0xdc396e6ea56e57cb), LL(0x95aac4c437c46ef3), LL(0x061b03030c03180f),
    LL(0xacdc565645568a13), LL(0x885e44440d441a49), LL(0xfea07f7fe17fdf9e), LL(0x4f88a9a99ea92137),
    LL(0x54672a2aa82a4d82), LL(0x6b0abbbbd6bbb16d), LL(0x9f87c1c123c146e2), LL(0xa6f153535153a202),
    LL(0xa572dcdc57dcae8b), LL(0x16530b0b2c0b5827), LL(0x27019d9d4e9d9cd3), LL(0xd82b6c6cad6c47c1),
    LL(0x62a43131c43195f5), LL(0xe8f37474cd7487b9), LL(0xf115f6f6fff6e309), LL(0x8c4c464605460a43),
    LL(0x45a5acac8aac0926), LL(0x0fb589891e893c97), LL(0x28b414145014a044), LL(0xdfbae1e1a3e15b42),
    LL(0x2ca616165816b04e), LL(0x74f73a3ae83acdd2), LL(0xd2066969b9696fd0), LL(0x124109092409482d),
    LL(0xe0d77070dd70a7ad), LL(0x716fb6b6e2b6d954), LL(0xbd1ed0d067d0ceb7), LL(0xc7d6eded93ed3b7e),
    LL(0x85e2cccc17cc2edb), LL(0x8468424215422a57), LL(0x2d2c98985a98b4c2), LL(0x55eda4a4aaa4490e),
    LL(0x50752828a0285d88), LL(0xb8865c5c6d5cda31), LL(0xed6bf8f8c7f8933f), LL(0x11c28686228644a4),
};
static const u64 C3[256] = {
    LL(0x7830d818186018c0), LL(0xaf462623238c2305), LL(0xf991b8c6c63fc67e), LL(0x6fcdfbe8e887e813),
    LL(0xa113cb878726874c), LL(0x626d11b8b8dab8a9), LL(0x0502090101040108), LL(0x6e9e0d4f4f214f42),
    LL(0xee6c9b3636d836ad), LL(0x0451ffa6a6a2a659), LL(0xbdb90cd2d26fd2de), LL(0x06f70ef5f5f3f5fb),
    LL(0x80f2967979f979ef), LL(0xcede306f6fa16f5f), LL(0xef3f6d91917e91fc), LL(0x07a4f852525552aa),
    LL(0xfdc04760609d6027), LL(0x766535bcbccabc89), LL(0xcd2b379b9b569bac), LL(0x8c018a8e8e028e04),
    LL(0x155bd2a3a3b6a371), LL(0x3c186c0c0c300c60), LL(0x8af6847b7bf17bff), LL(0xe16a803535d435b5),
    LL(0x693af51d1d741de8), LL(0x47ddb3e0e0a7e053), LL(0xacb321d7d77bd7f6), LL(0xed999cc2c22fc25e),
    LL(0x965c432e2eb82e6d), LL(0x7a96294b4b314b62), LL(0x21e15dfefedffea3), LL(0x16aed55757415782),
    LL(0x412abd15155415a8), LL(0xb6eee87777c1779f), LL(0xeb6e923737dc37a5), LL(0x56d79ee5e5b3e57b),
    LL(0xd923139f9f469f8c), LL(0x17fd23f0f0e7f0d3), LL(0x7f94204a4a354a6a), LL(0x95a944dada4fda9e),
    LL(0x25b0a258587d58fa), LL(0xca8fcfc9c903c906), LL(0x8d527c2929a42955), LL(0x22145a0a0a280a50),
    LL(0x4f7f50b1b1feb1e1), LL(0x1a5dc9a0a0baa069), LL(0xdad6146b6bb16b7f), LL(0xab17d985852e855c),
    LL(0x73673cbdbdcebd81), LL(0x34ba8f5d5d695dd2), LL(0x5020901010401080), LL(0x03f507f4f4f7f4f3),
    LL(0xc08bddcbcb0bcb16), LL(0xc67cd33e3ef83eed), LL(0x110a2d0505140528), LL(0xe6ce78676781671f),
    LL(0x53d597e4e4b7e473), LL(0xbb4e0227279c2725), LL(0x5882734141194132), LL(0x9d0ba78b8b168b2c),
    LL(0x0153f6a7a7a6a751), LL(0x94fab27d7de97dcf), LL(0xfb374995956e95dc), LL(0x9fad56d8d847d88e),
    LL(0x30eb70fbfbcbfb8b), LL(0x71c1cdeeee9fee23), LL(0x91f8bb7c7ced7cc7), LL(0xe3cc716666856617),
    LL(0x8ea77bdddd53dda6), LL(0x4b2eaf17175c17b8), LL(0x468e454747014702), LL(0xdc211a9e9e429e84),
    LL(0xc589d4caca0fca1e), LL(0x995a582d2db42d75), LL(0x79632ebfbfc6bf91), LL(0x1b0e3f07071c0738),
    LL(0x2347acadad8ead01), LL(0x2fb4b05a5a755aea), LL(0xb51bef838336836c), LL(0xff66b63333cc3385),
    LL(0xf2c65c636391633f), LL(0x0a04120202080210), LL(0x384993aaaa92aa39), LL(0xa8e2de7171d971af),
    LL(0xcf8dc6c8c807c80e), LL(0x7d32d119196419c8), LL(0x70923b4949394972), LL(0x9aaf5fd9d943d986),
    LL(0x1df931f2f2eff2c3), LL(0x48dba8e3e3abe34b), LL(0x2ab6b95b5b715be2), LL(0x920dbc88881a8834),
    LL(0xc8293e9a9a529aa4), LL(0xbe4c0b262698262d), LL(0xfa64bf3232c8328d), LL(0x4a7d59b0b0fab0e9),
    LL(0x6acff2e9e983e91b), LL(0x331e770f0f3c0f78), LL(0xa6b733d5d573d5e6), LL(0xba1df480803a8074),
    LL(0x7c6127bebec2be99), LL(0xde87ebcdcd13cd26), LL(0xe468893434d034bd), LL(0x75903248483d487a),
    LL(0x24e354ffffdbffab), LL(0x8ff48d7a7af57af7), LL(0xea3d6490907a90f4), LL(0x3ebe9d5f5f615fc2),
    LL(0xa0403d202080201d), LL(0xd5d00f6868bd6867), LL(0x7234ca1a1a681ad0), LL(0x2c41b7aeae82ae19),
    LL(0x5e757db4b4eab4c9), LL(0x19a8ce54544d549a), LL(0xe53b7f93937693ec), LL(0xaa442f222288220d),
    LL(0xe9c86364648d6407), LL(0x12ff2af1f1e3f1db), LL(0xa2e6cc7373d173bf), LL(0x5a24821212481290),
    LL(0x5d807a40401d403a), LL(0x2810480808200840), LL(0xe89b95c3c32bc356), LL(0x7bc5dfecec97ec33),
    LL(0x90ab4ddbdb4bdb96), LL(0x1f5fc0a1a1bea161), LL(0x8307918d8d0e8d1c), LL(0xc97ac83d3df43df5),
    LL(0xf1335b97976697cc), LL(0x0000000000000000), LL(0xd483f9cfcf1bcf36), LL(0x87566e2b2bac2b45),
    LL(0xb3ece17676c57697), LL(0xb019e68282328264), LL(0xa9b128d6d67fd6fe), LL(0x7736c31b1b6c1bd8),
    LL(0x5b7774b5b5eeb5c1), LL(0x2943beafaf86af11), LL(0xdfd41d6a6ab56a77), LL(0x0da0ea50505d50ba),
    LL(0x4c8a574545094512), LL(0x18fb38f3f3ebf3cb), LL(0xf060ad3030c0309d), LL(0x74c3c4efef9bef2b),
    LL(0xc37eda3f3ffc3fe5), LL(0x1caac75555495592), LL(0x1059dba2a2b2a279), LL(0x65c9e9eaea8fea03),
    LL(0xecca6a656589650f), LL(0x686903babad2bab9), LL(0x935e4a2f2fbc2f65), LL(0xe79d8ec0c027c04e),
    LL(0x81a160dede5fdebe), LL(0x6c38fc1c1c701ce0), LL(0x2ee746fdfdd3fdbb), LL(0x649a1f4d4d294d52),
    LL(0xe0397692927292e4), LL(0xbceafa7575c9758f), LL(0x1e0c360606180630), LL(0x9809ae8a8a128a24),
    LL(0x40794bb2b2f2b2f9), LL(0x59d185e6e6bfe663), LL(0x361c7e0e0e380e70), LL(0x633ee71f1f7c1ff8),
    LL(0xf7c4556262956237), LL(0xa3b53ad4d477d4ee), LL(0x324d81a8a89aa829), LL(0xf4315296966296c4),
    LL(0x3aef62f9f9c3f99b), LL(0xf697a3c5c533c566), LL(0xb14a102525942535), LL(0x20b2ab59597959f2),
    LL(0xae15d084842a8454), LL(0xa7e4c57272d572b7), LL(0xdd72ec3939e439d5), LL(0x6198164c4c2d4c5a),
    LL(0x3bbc945e5e655eca), LL(0x85f09f7878fd78e7), LL(0xd870e53838e038dd), LL(0x8605988c8c0a8c14),
    LL(0xb2bf17d1d163d1c6), LL(0x0b57e4a5a5aea541), LL(0x4dd9a1e2e2afe243), LL(0xf8c24e616199612f),
    LL(0x457b42b3b3f6b3f1), LL(0xa542342121842115), LL(0xd625089c9c4a9c94), LL(0x663cee1e1e781ef0),
    LL(0x5286614343114322), LL(0xfc93b1c7c73bc776), LL(0x2be54ffcfcd7fcb3), LL(0x1408240404100420),
    LL(0x08a2e351515951b2), LL(0xc72f2599995e99bc), LL(0xc4da226d6da96d4f), LL(0x391a650d0d340d68),
    LL(0x35e979fafacffa83), LL(0x84a369dfdf5bdfb6), LL(0x9bfca97e7ee57ed7), LL(0xb44819242490243d),
    LL(0xd776fe3b3bec3bc5), LL(0x3d4b9aabab96ab31), LL(0xd181f0cece1fce3e), LL(0x5522991111441188),
    LL(0x8903838f8f068f0c), LL(0x6b9c044e4e254e4a), LL(0x517366b7b7e6b7d1), LL(0x60cbe0ebeb8beb0b),
    LL(0xcc78c13c3cf03cfd), LL(0xbf1ffd81813e817c), LL(0xfe354094946a94d4), LL(0x0cf31cf7f7fbf7eb),
    LL(0x676f18b9b9deb9a1), LL(0x5f268b13134c1398), LL(0x9c58512c2cb02c7d), LL(0xb8bb05d3d36bd3d6),
    LL(0x5cd38ce7e7bbe76b), LL(0xcbdc396e6ea56e57), LL(0xf395aac4c437c46e), LL(0x0f061b03030c0318),
    LL(0x13acdc565645568a), LL(0x49885e44440d441a), LL(0x9efea07f7fe17fdf), LL(0x374f88a9a99ea921),
    LL(0x8254672a2aa82a4d), LL(0x6d6b0abbbbd6bbb1), LL(0xe29f87c1c123c146), LL(0x02a6f153535153a2),
    LL(0x8ba572dcdc57dcae), LL(0x2716530b0b2c0b58), LL(0xd327019d9d4e9d9c), LL(0xc1d82b6c6cad6c47),
    LL(0xf562a43131c43195), LL(0xb9e8f37474cd7487), LL(0x09f115f6f6fff6e3), LL(0x438c4c464605460a),
    LL(0x2645a5acac8aac09), LL(0x970fb589891e893c), LL(0x4428b414145014a0), LL(0x42dfbae1e1a3e15b),
    LL(0x4e2ca616165816b0), LL(0xd274f73a3ae83acd), LL(0xd0d2066969b9696f), LL(0x2d12410909240948),
    LL(0xade0d77070dd70a7), LL(0x54716fb6b6e2b6d9), LL(0xb7bd1ed0d067d0ce), LL(0x7ec7d6eded93ed3b),
    LL(0xdb85e2cccc17cc2e), LL(0x578468424215422a), LL(0xc22d2c98985a98b4), LL(0x0e55eda4a4aaa449),
    LL(0x8850752828a0285d), LL(0x31b8865c5c6d5cda), LL(0x3fed6bf8f8c7f893), LL(0xa411c28686228644),
};
static const u64 C4[256] = {
    LL(0xc07830d818186018), LL(0x05af462623238c23), LL(0x7ef991b8c6c63fc6), LL(0x136fcdfbe8e887e8),
    LL(0x4ca113cb87872687), LL(0xa9626d11b8b8dab8), LL(0x0805020901010401), LL(0x426e9e0d4f4f214f),
    LL(0xadee6c9b3636d836), LL(0x590451ffa6a6a2a6), LL(0xdebdb90cd2d26fd2), LL(0xfb06f70ef5f5f3f5),
    LL(0xef80f2967979f979), LL(0x5fcede306f6fa16f), LL(0xfcef3f6d91917e91), LL(0xaa07a4f852525552),
    LL(0x27fdc04760609d60), LL(0x89766535bcbccabc), LL(0xaccd2b379b9b569b), LL(0x048c018a8e8e028e),
    LL(0x71155bd2a3a3b6a3), LL(0x603c186c0c0c300c), LL(0xff8af6847b7bf17b), LL(0xb5e16a803535d435),
    LL(0xe8693af51d1d741d), LL(0x5347ddb3e0e0a7e0), LL(0xf6acb321d7d77bd7), LL(0x5eed999cc2c22fc2),
    LL(0x6d965c432e2eb82e), LL(0x627a96294b4b314b), LL(0xa321e15dfefedffe), LL(0x8216aed557574157),
    LL(0xa8412abd15155415), LL(0x9fb6eee87777c177), LL(0xa5eb6e923737dc37), LL(0x7b56d79ee5e5b3e5),
    LL(0x8cd923139f9f469f), LL(0xd317fd23f0f0e7f0), LL(0x6a7f94204a4a354a), LL(0x9e95a944dada4fda),
    LL(0xfa25b0a258587d58), LL(0x06ca8fcfc9c903c9), LL(0x558d527c2929a429), LL(0x5022145a0a0a280a),
    LL(0xe14f7f50b1b1feb1), LL(0x691a5dc9a0a0baa0), LL(0x7fdad6146b6bb16b), LL(0x5cab17d985852e85),
    LL(0x8173673cbdbdcebd), LL(0xd234ba8f5d5d695d), LL(0x8050209010104010), LL(0xf303f507f4f4f7f4),
    LL(0x16c08bddcbcb0bcb), LL(0xedc67cd33e3ef83e), LL(0x28110a2d05051405), LL(0x1fe6ce7867678167),
    LL(0x7353d597e4e4b7e4), LL(0x25bb4e0227279c27), LL(0x3258827341411941), LL(0x2c9d0ba78b8b168b),
    LL(0x510153f6a7a7a6a7), LL(0xcf94fab27d7de97d), LL(0xdcfb374995956e95), LL(0x8e9fad56d8d847d8),
    LL(0x8b30eb70fbfbcbfb), LL(0x2371c1cdeeee9fee), LL(0xc791f8bb7c7ced7c), LL(0x17e3cc7166668566),
    LL(0xa68ea77bdddd53dd), LL(0xb84b2eaf17175c17), LL(0x02468e4547470147), LL(0x84dc211a9e9e429e),
    LL(0x1ec589d4caca0fca), LL(0x75995a582d2db42d), LL(0x9179632ebfbfc6bf), LL(0x381b0e3f07071c07),
    LL(0x012347acadad8ead), LL(0xea2fb4b05a5a755a), LL(0x6cb51bef83833683), LL(0x85ff66b63333cc33),
    LL(0x3ff2c65c63639163), LL(0x100a041202020802), LL(0x39384993aaaa92aa), LL(0xafa8e2de7171d971),
    LL(0x0ecf8dc6c8c807c8), LL(0xc87d32d119196419), LL(0x7270923b49493949), LL(0x869aaf5fd9d943d9),
    LL(0xc31df931f2f2eff2), LL(0x4b48dba8e3e3abe3), LL(0xe22ab6b95b5b715b), LL(0x34920dbc88881a88),
    LL(0xa4c8293e9a9a529a), LL(0x2dbe4c0b26269826), LL(0x8dfa64bf3232c832), LL(0xe94a7d59b0b0fab0),
    LL(0x1b6acff2e9e983e9), LL(0x78331e770f0f3c0f), LL(0xe6a6b733d5d573d5), LL(0x74ba1df480803a80),
    LL(0x997c6127bebec2be), LL(0x26de87ebcdcd13cd), LL(0xbde468893434d034), LL(0x7a75903248483d48),
    LL(0xab24e354ffffdbff), LL(0xf78ff48d7a7af57a), LL(0xf4ea3d6490907a90), LL(0xc23ebe9d5f5f615f),
    LL(0x1da0403d20208020), LL(0x67d5d00f6868bd68), LL(0xd07234ca1a1a681a), LL(0x192c41b7aeae82ae),
    LL(0xc95e757db4b4eab4), LL(0x9a19a8ce54544d54), LL(0xece53b7f93937693), LL(0x0daa442f22228822),
    LL(0x07e9c86364648d64), LL(0xdb12ff2af1f1e3f1), LL(0xbfa2e6cc7373d173), LL(0x905a248212124812),
    LL(0x3a5d807a40401d40), LL(0x4028104808082008), LL(0x56e89b95c3c32bc3), LL(0x337bc5dfecec97ec),
    LL(0x9690ab4ddbdb4bdb), LL(0x611f5fc0a1a1bea1), LL(0x1c8307918d8d0e8d), LL(0xf5c97ac83d3df43d),
    LL(0xccf1335b97976697), LL(0x0000000000000000), LL(0x36d483f9cfcf1bcf), LL(0x4587566e2b2bac2b),
    LL(0x97b3ece17676c576), LL(0x64b019e682823282), LL(0xfea9b128d6d67fd6), LL(0xd87736c31b1b6c1b),
    LL(0xc15b7774b5b5eeb5), LL(0x112943beafaf86af), LL(0x77dfd41d6a6ab56a), LL(0xba0da0ea50505d50),
    LL(0x124c8a5745450945), LL(0xcb18fb38f3f3ebf3), LL(0x9df060ad3030c030), LL(0x2b74c3c4efef9bef),
    LL(0xe5c37eda3f3ffc3f), LL(0x921caac755554955), LL(0x791059dba2a2b2a2), LL(0x0365c9e9eaea8fea),
    LL(0x0fecca6a65658965), LL(0xb9686903babad2ba), LL(0x65935e4a2f2fbc2f), LL(0x4ee79d8ec0c027c0),
    LL(0xbe81a160dede5fde), LL(0xe06c38fc1c1c701c), LL(0xbb2ee746fdfdd3fd), LL(0x52649a1f4d4d294d),
    LL(0xe4e0397692927292), LL(0x8fbceafa7575c975), LL(0x301e0c3606061806), LL(0x249809ae8a8a128a),
    LL(0xf940794bb2b2f2b2), LL(0x6359d185e6e6bfe6), LL(0x70361c7e0e0e380e), LL(0xf8633ee71f1f7c1f),
    LL(0x37f7c45562629562), LL(0xeea3b53ad4d477d4), LL(0x29324d81a8a89aa8), LL(0xc4f4315296966296),
    LL(0x9b3aef62f9f9c3f9), LL(0x66f697a3c5c533c5), LL(0x35b14a1025259425), LL(0xf220b2ab59597959),
    LL(0x54ae15d084842a84), LL(0xb7a7e4c57272d572), LL(0xd5dd72ec3939e439), LL(0x5a6198164c4c2d4c),
    LL(0xca3bbc945e5e655e), LL(0xe785f09f7878fd78), LL(0xddd870e53838e038), LL(0x148605988c8c0a8c),
    LL(0xc6b2bf17d1d163d1), LL(0x410b57e4a5a5aea5), LL(0x434dd9a1e2e2afe2), LL(0x2ff8c24e61619961),
    LL(0xf1457b42b3b3f6b3), LL(0x15a5423421218421), LL(0x94d625089c9c4a9c), LL(0xf0663cee1e1e781e),
    LL(0x2252866143431143), LL(0x76fc93b1c7c73bc7), LL(0xb32be54ffcfcd7fc), LL(0x2014082404041004),
    LL(0xb208a2e351515951), LL(0xbcc72f2599995e99), LL(0x4fc4da226d6da96d), LL(0x68391a650d0d340d),
    LL(0x8335e979fafacffa), LL(0xb684a369dfdf5bdf), LL(0xd79bfca97e7ee57e), LL(0x3db4481924249024),
    LL(0xc5d776fe3b3bec3b), LL(0x313d4b9aabab96ab), LL(0x3ed181f0cece1fce), LL(0x8855229911114411),
    LL(0x0c8903838f8f068f), LL(0x4a6b9c044e4e254e), LL(0xd1517366b7b7e6b7), LL(0x0b60cbe0ebeb8beb),
    LL(0xfdcc78c13c3cf03c), LL(0x7cbf1ffd81813e81), LL(0xd4fe354094946a94), LL(0xeb0cf31cf7f7fbf7),
    LL(0xa1676f18b9b9deb9), LL(0x985f268b13134c13), LL(0x7d9c58512c2cb02c), LL(0xd6b8bb05d3d36bd3),
    LL(0x6b5cd38ce7e7bbe7), LL(0x57cbdc396e6ea56e), LL(0x6ef395aac4c437c4), LL(0x180f061b03030c03),
    LL(0x8a13acdc56564556), LL(0x1a49885e44440d44), LL(0xdf9efea07f7fe17f), LL(0x21374f88a9a99ea9),
    LL(0x4d8254672a2aa82a), LL(0xb16d6b0abbbbd6bb), LL(0x46e29f87c1c123c1), LL(0xa202a6f153535153),
    LL(0xae8ba572dcdc57dc), LL(0x582716530b0b2c0b), LL(0x9cd327019d9d4e9d), LL(0x47c1d82b6c6cad6c),
    LL(0x95f562a43131c431), LL(0x87b9e8f37474cd74), LL(0xe309f115f6f6fff6), LL(0x0a438c4c46460546),
    LL(0x092645a5acac8aac), LL(0x3c970fb589891e89), LL(0xa04428b414145014), LL(0x5b42dfbae1e1a3e1),
    LL(0xb04e2ca616165816), LL(0xcdd274f73a3ae83a), LL(0x6fd0d2066969b969), LL(0x482d124109092409),
    LL(0xa7ade0d77070dd70), LL(0xd954716fb6b6e2b6), LL(0xceb7bd1ed0d067d0), LL(0x3b7ec7d6eded93ed),
    LL(0x2edb85e2cccc17cc), LL(0x2a57846842421542), LL(0xb4c22d2c98985a98), LL(0x490e55eda4a4aaa4),
    LL(0x5d8850752828a028), LL(0xda31b8865c5c6d5c), LL(0x933fed6bf8f8c7f8), LL(0x44a411c286862286),
};
static const u64 C5[256] = {
    LL(0x18c07830d8181860), LL(0x2305af462623238c), LL(0xc67ef991b8c6c63f), LL(0xe8136fcdfbe8e887),
    LL(0x874ca113cb878726), LL(0xb8a9626d11b8b8da), LL(0x0108050209010104), LL(0x4f426e9e0d4f4f21),
    LL(0x36adee6c9b3636d8), LL(0xa6590451ffa6a6a2), LL(0xd2debdb90cd2d26f), LL(0xf5fb06f70ef5f5f3),
    LL(0x79ef80f2967979f9), LL(0x6f5fcede306f6fa1), LL(0x91fcef3f6d91917e), LL(0x52aa07a4f8525255),
    LL(0x6027fdc04760609d), LL(0xbc89766535bcbcca), LL(0x9baccd2b379b9b56), LL(0x8e048c018a8e8e02),
    LL(0xa371155bd2a3a3b6), LL(0x0c603c186c0c0c30), LL(0x7bff8af6847b7bf1), LL(0x35b5e16a803535d4),
    LL(0x1de8693af51d1d74), LL(0xe05347ddb3e0e0a7), LL(0xd7f6acb321d7d77b), LL(0xc25eed999cc2c22f),
    LL(0x2e6d965c432e2eb8), LL(0x4b627a96294b4b31), LL(0xfea321e15dfefedf), LL(0x578216aed5575741),
    LL(0x15a8412abd151554), LL(0x779fb6eee87777c1), LL(0x37a5eb6e923737dc), LL(0xe57b56d79ee5e5b3),
    LL(0x9f8cd923139f9f46), LL(0xf0d317fd23f0f0e7), LL(0x4a6a7f94204a4a35), LL(0xda9e95a944dada4f),
    LL(0x58fa25b0a258587d), LL(0xc906ca8fcfc9c903), LL(0x29558d527c2929a4), LL(0x0a5022145a0a0a28),
    LL(0xb1e14f7f50b1b1fe), LL(0xa0691a5dc9a0a0ba), LL(0x6b7fdad6146b6bb1), LL(0x855cab17d985852e),
    LL(0xbd8173673cbdbdce), LL(0x5dd234ba8f5d5d69), LL(0x1080502090101040), LL(0xf4f303f507f4f4f7),
    LL(0xcb16c08bddcbcb0b), LL(0x3eedc67cd33e3ef8), LL(0x0528110a2d050514), LL(0x671fe6ce78676781),
    LL(0xe47353d597e4e4b7), LL(0x2725bb4e0227279c), LL(0x4132588273414119), LL(0x8b2c9d0ba78b8b16),
    LL(0xa7510153f6a7a7a6), LL(0x7dcf94fab27d7de9), LL(0x95dcfb374995956e), LL(0xd88e9fad56d8d847),
    LL(0xfb8b30eb70fbfbcb), LL(0xee2371c1cdeeee9f), LL(0x7cc791f8bb7c7ced), LL(0x6617e3cc71666685),
    LL(0xdda68ea77bdddd53), LL(0x17b84b2eaf17175c), LL(0x4702468e45474701), LL(0x9e84dc211a9e9e42),
    LL(0xca1ec589d4caca0f), LL(0x2d75995a582d2db4), LL(0xbf9179632ebfbfc6), LL(0x07381b0e3f07071c),
    LL(0xad012347acadad8e), LL(0x5aea2fb4b05a5a75), LL(0x836cb51bef838336), LL(0x3385ff66b63333cc),
    LL(0x633ff2c65c636391), LL(0x02100a0412020208), LL(0xaa39384993aaaa92), LL(0x71afa8e2de7171d9),
    LL(0xc80ecf8dc6c8c807), LL(0x19c87d32d1191964), LL(0x497270923b494939), LL(0xd9869aaf5fd9d943),
    LL(0xf2c31df931f2f2ef), LL(0xe34b48dba8e3e3ab), LL(0x5be22ab6b95b5b71), LL(0x8834920dbc88881a),
    LL(0x9aa4c8293e9a9a52), LL(0x262dbe4c0b262698), LL(0x328dfa64bf3232c8), LL(0xb0e94a7d59b0b0fa),
    LL(0xe91b6acff2e9e983), LL(0x0f78331e770f0f3c), LL(0xd5e6a6b733d5d573), LL(0x8074ba1df480803a),
    LL(0xbe997c6127bebec2), LL(0xcd26de87ebcdcd13), LL(0x34bde468893434d0), LL(0x487a75903248483d),
    LL(0xffab24e354ffffdb), LL(0x7af78ff48d7a7af5), LL(0x90f4ea3d6490907a), LL(0x5fc23ebe9d5f5f61),
    LL(0x201da0403d202080), LL(0x6867d5d00f6868bd), LL(0x1ad07234ca1a1a68), LL(0xae192c41b7aeae82),
    LL(0xb4c95e757db4b4ea), LL(0x549a19a8ce54544d), LL(0x93ece53b7f939376), LL(0x220daa442f222288),
    LL(0x6407e9c86364648d), LL(0xf1db12ff2af1f1e3), LL(0x73bfa2e6cc7373d1), LL(0x12905a2482121248),
    LL(0x403a5d807a40401d), LL(0x0840281048080820), LL(0xc356e89b95c3c32b), LL(0xec337bc5dfecec97),
    LL(0xdb9690ab4ddbdb4b), LL(0xa1611f5fc0a1a1be), LL(0x8d1c8307918d8d0e), LL(0x3df5c97ac83d3df4),
    LL(0x97ccf1335b979766), LL(0x0000000000000000), LL(0xcf36d483f9cfcf1b), LL(0x2b4587566e2b2bac),
    LL(0x7697b3ece17676c5), LL(0x8264b019e6828232), LL(0xd6fea9b128d6d67f), LL(0x1bd87736c31b1b6c),
    LL(0xb5c15b7774b5b5ee), LL(0xaf112943beafaf86), LL(0x6a77dfd41d6a6ab5), LL(0x50ba0da0ea50505d),
    LL(0x45124c8a57454509), LL(0xf3cb18fb38f3f3eb), LL(0x309df060ad3030c0), LL(0xef2b74c3c4efef9b),
    LL(0x3fe5c37eda3f3ffc), LL(0x55921caac7555549), LL(0xa2791059dba2a2b2), LL(0xea0365c9e9eaea8f),
    LL(0x650fecca6a656589), LL(0xbab9686903babad2), LL(0x2f65935e4a2f2fbc), LL(0xc04ee79d8ec0c027),
    LL(0xdebe81a160dede5f), LL(0x1ce06c38fc1c1c70), LL(0xfdbb2ee746fdfdd3), LL(0x4d52649a1f4d4d29),
    LL(0x92e4e03976929272), LL(0x758fbceafa7575c9), LL(0x06301e0c36060618), LL(0x8a249809ae8a8a12),
    LL(0xb2f940794bb2b2f2), LL(0xe66359d185e6e6bf), LL(0x0e70361c7e0e0e38), LL(0x1ff8633ee71f1f7c),
    LL(0x6237f7c455626295), LL(0xd4eea3b53ad4d477), LL(0xa829324d81a8a89a), LL(0x96c4f43152969662),
    LL(0xf99b3aef62f9f9c3), LL(0xc566f697a3c5c533), LL(0x2535b14a10252594), LL(0x59f220b2ab595979),
    LL(0x8454ae15d084842a), LL(0x72b7a7e4c57272d5), LL(0x39d5dd72ec3939e4), LL(0x4c5a6198164c4c2d),
    LL(0x5eca3bbc945e5e65), LL(0x78e785f09f7878fd), LL(0x38ddd870e53838e0), LL(0x8c148605988c8c0a),
    LL(0xd1c6b2bf17d1d163), LL(0xa5410b57e4a5a5ae), LL(0xe2434dd9a1e2e2af), LL(0x612ff8c24e616199),
    LL(0xb3f1457b42b3b3f6), LL(0x2115a54234212184), LL(0x9c94d625089c9c4a), LL(0x1ef0663cee1e1e78),
    LL(0x4322528661434311), LL(0xc776fc93b1c7c73b), LL(0xfcb32be54ffcfcd7), LL(0x0420140824040410),
    LL(0x51b208a2e3515159), LL(0x99bcc72f2599995e), LL(0x6d4fc4da226d6da9), LL(0x0d68391a650d0d34),
    LL(0xfa8335e979fafacf), LL(0xdfb684a369dfdf5b), LL(0x7ed79bfca97e7ee5), LL(0x243db44819242490),
    LL(0x3bc5d776fe3b3bec), LL(0xab313d4b9aabab96), LL(0xce3ed181f0cece1f), LL(0x1188552299111144),
    LL(0x8f0c8903838f8f06), LL(0x4e4a6b9c044e4e25), LL(0xb7d1517366b7b7e6), LL(0xeb0b60cbe0ebeb8b),
    LL(0x3cfdcc78c13c3cf0), LL(0x817cbf1ffd81813e), LL(0x94d4fe354094946a), LL(0xf7eb0cf31cf7f7fb),
    LL(0xb9a1676f18b9b9de), LL(0x13985f268b13134c), LL(0x2c7d9c58512c2cb0), LL(0xd3d6b8bb05d3d36b),
    LL(0xe76b5cd38ce7e7bb), LL(0x6e57cbdc396e6ea5), LL(0xc46ef395aac4c437), LL(0x03180f061b03030c),
    LL(0x568a13acdc565645), LL(0x441a49885e44440d), LL(0x7fdf9efea07f7fe1), LL(0xa921374f88a9a99e),
    LL(0x2a4d8254672a2aa8), LL(0xbbb16d6b0abbbbd6), LL(0xc146e29f87c1c123), LL(0x53a202a6f1535351),
    LL(0xdcae8ba572dcdc57), LL(0x0b582716530b0b2c), LL(0x9d9cd327019d9d4e), LL(0x6c47c1d82b6c6cad),
    LL(0x3195f562a43131c4), LL(0x7487b9e8f37474cd), LL(0xf6e309f115f6f6ff), LL(0x460a438c4c464605),
    LL(0xac092645a5acac8a), LL(0x893c970fb589891e), LL(0x14a04428b4141450), LL(0xe15b42dfbae1e1a3),
    LL(0x16b04e2ca6161658), LL(0x3acdd274f73a3ae8), LL(0x696fd0d2066969b9), LL(0x09482d1241090924),
    LL(0x70a7ade0d77070dd), LL(0xb6d954716fb6b6e2), LL(0xd0ceb7bd1ed0d067), LL(0xed3b7ec7d6eded93),
    LL(0xcc2edb85e2cccc17), LL(0x422a578468424215), LL(0x98b4c22d2c98985a), LL(0xa4490e55eda4a4aa),
    LL(0x285d8850752828a0), LL(0x5cda31b8865c5c6d), LL(0xf8933fed6bf8f8c7), LL(0x8644a411c2868622),
};
static const u64 C6[256] = {
    LL(0x6018c07830d81818), LL(0x8c2305af46262323), LL(0x3fc67ef991b8c6c6), LL(0x87e8136fcdfbe8e8),
    LL(0x26874ca113cb8787), LL(0xdab8a9626d11b8b8), LL(0x0401080502090101), LL(0x214f426e9e0d4f4f),
    LL(0xd836adee6c9b3636), LL(0xa2a6590451ffa6a6), LL(0x6fd2debdb90cd2d2), LL(0xf3f5fb06f70ef5f5),
    LL(0xf979ef80f2967979), LL(0xa16f5fcede306f6f), LL(0x7e91fcef3f6d9191), LL(0x5552aa07a4f85252),
    LL(0x9d6027fdc0476060), LL(0xcabc89766535bcbc), LL(0x569baccd2b379b9b), LL(0x028e048c018a8e8e),
    LL(0xb6a371155bd2a3a3), LL(0x300c603c186c0c0c), LL(0xf17bff8af6847b7b), LL(0xd435b5e16a803535),
    LL(0x741de8693af51d1d), LL(0xa7e05347ddb3e0e0), LL(0x7bd7f6acb321d7d7), LL(0x2fc25eed999cc2c2),
    LL(0xb82e6d965c432e2e), LL(0x314b627a96294b4b), LL(0xdffea321e15dfefe), LL(0x41578216aed55757),
    LL(0x5415a8412abd1515), LL(0xc1779fb6eee87777), LL(0xdc37a5eb6e923737), LL(0xb3e57b56d79ee5e5),
    LL(0x469f8cd923139f9f), LL(0xe7f0d317fd23f0f0), LL(0x354a6a7f94204a4a), LL(0x4fda9e95a944dada),
    LL(0x7d58fa25b0a25858), LL(0x03c906ca8fcfc9c9), LL(0xa429558d527c2929), LL(0x280a5022145a0a0a),
    LL(0xfeb1e14f7f50b1b1), LL(0xbaa0691a5dc9a0a0), LL(0xb16b7fdad6146b6b), LL(0x2e855cab17d98585),
    LL(0xcebd8173673cbdbd), LL(0x695dd234ba8f5d5d), LL(0x4010805020901010), LL(0xf7f4f303f507f4f4),
    LL(0x0bcb16c08bddcbcb), LL(0xf83eedc67cd33e3e), LL(0x140528110a2d0505), LL(0x81671fe6ce786767),
    LL(0xb7e47353d597e4e4), LL(0x9c2725bb4e022727), LL(0x1941325882734141), LL(0x168b2c9d0ba78b8b),
    LL(0xa6a7510153f6a7a7), LL(0xe97dcf94fab27d7d), LL(0x6e95dcfb37499595), LL(0x47d88e9fad56d8d8),
    LL(0xcbfb8b30eb70fbfb), LL(0x9fee2371c1cdeeee), LL(0xed7cc791f8bb7c7c), LL(0x856617e3cc716666),
    LL(0x53dda68ea77bdddd), LL(0x5c17b84b2eaf1717), LL(0x014702468e454747), LL(0x429e84dc211a9e9e),
    LL(0x0fca1ec589d4caca), LL(0xb42d75995a582d2d), LL(0xc6bf9179632ebfbf), LL(0x1c07381b0e3f0707),
    LL(0x8ead012347acadad), LL(0x755aea2fb4b05a5a), LL(0x36836cb51bef8383), LL(0xcc3385ff66b63333),
    LL(0x91633ff2c65c6363), LL(0x0802100a04120202), LL(0x92aa39384993aaaa), LL(0xd971afa8e2de7171),
    LL(0x07c80ecf8dc6c8c8), LL(0x6419c87d32d11919), LL(0x39497270923b4949), LL(0x43d9869aaf5fd9d9),
    LL(0xeff2c31df931f2f2), LL(0xabe34b48dba8e3e3), LL(0x715be22ab6b95b5b), LL(0x1a8834920dbc8888),
    LL(0x529aa4c8293e9a9a), LL(0x98262dbe4c0b2626), LL(0xc8328dfa64bf3232), LL(0xfab0e94a7d59b0b0),
    LL(0x83e91b6acff2e9e9), LL(0x3c0f78331e770f0f), LL(0x73d5e6a6b733d5d5), LL(0x3a8074ba1df48080),
    LL(0xc2be997c6127bebe), LL(0x13cd26de87ebcdcd), LL(0xd034bde468893434), LL(0x3d487a7590324848),
    LL(0xdbffab24e354ffff), LL(0xf57af78ff48d7a7a), LL(0x7a90f4ea3d649090), LL(0x615fc23ebe9d5f5f),
    LL(0x80201da0403d2020), LL(0xbd6867d5d00f6868), LL(0x681ad07234ca1a1a), LL(0x82ae192c41b7aeae),
    LL(0xeab4c95e757db4b4), LL(0x4d549a19a8ce5454), LL(0x7693ece53b7f9393), LL(0x88220daa442f2222),
    LL(0x8d6407e9c8636464), LL(0xe3f1db12ff2af1f1), LL(0xd173bfa2e6cc7373), LL(0x4812905a24821212),
    LL(0x1d403a5d807a4040), LL(0x2008402810480808), LL(0x2bc356e89b95c3c3), LL(0x97ec337bc5dfecec),
    LL(0x4bdb9690ab4ddbdb), LL(0xbea1611f5fc0a1a1), LL(0x0e8d1c8307918d8d), LL(0xf43df5c97ac83d3d),
    LL(0x6697ccf1335b9797), LL(0x0000000000000000), LL(0x1bcf36d483f9cfcf), LL(0xac2b4587566e2b2b),
    LL(0xc57697b3ece17676), LL(0x328264b019e68282), LL(0x7fd6fea9b128d6d6), LL(0x6c1bd87736c31b1b),
    LL(0xeeb5c15b7774b5b5), LL(0x86af112943beafaf), LL(0xb56a77dfd41d6a6a), LL(0x5d50ba0da0ea5050),
    LL(0x0945124c8a574545), LL(0xebf3cb18fb38f3f3), LL(0xc0309df060ad3030), LL(0x9bef2b74c3c4efef),
    LL(0xfc3fe5c37eda3f3f), LL(0x4955921caac75555), LL(0xb2a2791059dba2a2), LL(0x8fea0365c9e9eaea),
    LL(0x89650fecca6a6565), LL(0xd2bab9686903baba), LL(0xbc2f65935e4a2f2f), LL(0x27c04ee79d8ec0c0),
    LL(0x5fdebe81a160dede), LL(0x701ce06c38fc1c1c), LL(0xd3fdbb2ee746fdfd), LL(0x294d52649a1f4d4d),
    LL(0x7292e4e039769292), LL(0xc9758fbceafa7575), LL(0x1806301e0c360606), LL(0x128a249809ae8a8a),
    LL(0xf2b2f940794bb2b2), LL(0xbfe66359d185e6e6), LL(0x380e70361c7e0e0e), LL(0x7c1ff8633ee71f1f),
    LL(0x956237f7c4556262), LL(0x77d4eea3b53ad4d4), LL(0x9aa829324d81a8a8), LL(0x6296c4f431529696),
    LL(0xc3f99b3aef62f9f9), LL(0x33c566f697a3c5c5), LL(0x942535b14a102525), LL(0x7959f220b2ab5959),
    LL(0x2a8454ae15d08484), LL(0xd572b7a7e4c57272), LL(0xe439d5dd72ec3939), LL(0x2d4c5a6198164c4c),
    LL(0x655eca3bbc945e5e), LL(0xfd78e785f09f7878), LL(0xe038ddd870e53838), LL(0x0a8c148605988c8c),
    LL(0x63d1c6b2bf17d1d1), LL(0xaea5410b57e4a5a5), LL(0xafe2434dd9a1e2e2), LL(0x99612ff8c24e6161),
    LL(0xf6b3f1457b42b3b3), LL(0x842115a542342121), LL(0x4a9c94d625089c9c), LL(0x781ef0663cee1e1e),
    LL(0x1143225286614343), LL(0x3bc776fc93b1c7c7), LL(0xd7fcb32be54ffcfc), LL(0x1004201408240404),
    LL(0x5951b208a2e35151), LL(0x5e99bcc72f259999), LL(0xa96d4fc4da226d6d), LL(0x340d68391a650d0d),
    LL(0xcffa8335e979fafa), LL(0x5bdfb684a369dfdf), LL(0xe57ed79bfca97e7e), LL(0x90243db448192424),
    LL(0xec3bc5d776fe3b3b), LL(0x96ab313d4b9aabab), LL(0x1fce3ed181f0cece), LL(0x4411885522991111),
    LL(0x068f0c8903838f8f), LL(0x254e4a6b9c044e4e), LL(0xe6b7d1517366b7b7), LL(0x8beb0b60cbe0ebeb),
    LL(0xf03cfdcc78c13c3c), LL(0x3e817cbf1ffd8181), LL(0x6a94d4fe35409494), LL(0xfbf7eb0cf31cf7f7),
    LL(0xdeb9a1676f18b9b9), LL(0x4c13985f268b1313), LL(0xb02c7d9c58512c2c), LL(0x6bd3d6b8bb05d3d3),
    LL(0xbbe76b5cd38ce7e7), LL(0xa56e57cbdc396e6e), LL(0x37c46ef395aac4c4), LL(0x0c03180f061b0303),
    LL(0x45568a13acdc5656), LL(0x0d441a49885e4444), LL(0xe17fdf9efea07f7f), LL(0x9ea921374f88a9a9),
    LL(0xa82a4d8254672a2a), LL(0xd6bbb16d6b0abbbb), LL(0x23c146e29f87c1c1), LL(0x5153a202a6f15353),
    LL(0x57dcae8ba572dcdc), LL(0x2c0b582716530b0b), LL(0x4e9d9cd327019d9d), LL(0xad6c47c1d82b6c6c),
    LL(0xc43195f562a43131), LL(0xcd7487b9e8f37474), LL(0xfff6e309f115f6f6), LL(0x05460a438c4c4646),
    LL(0x8aac092645a5acac), LL(0x1e893c970fb58989), LL(0x5014a04428b41414), LL(0xa3e15b42dfbae1e1),
    LL(0x5816b04e2ca61616), LL(0xe83acdd274f73a3a), LL(0xb9696fd0d2066969), LL(0x2409482d12410909),
    LL(0xdd70a7ade0d77070), LL(0xe2b6d954716fb6b6), LL(0x67d0ceb7bd1ed0d0), LL(0x93ed3b7ec7d6eded),
    LL(0x17cc2edb85e2cccc), LL(0x15422a5784684242), LL(0x5a98b4c22d2c9898), LL(0xaaa4490e55eda4a4),
    LL(0xa0285d8850752828), LL(0x6d5cda31b8865c5c), LL(0xc7f8933fed6bf8f8), LL(0x228644a411c28686),
};
static const u64 C7[256] = {
    LL(0x186018c07830d818), LL(0x238c2305af462623), LL(0xc63fc67ef991b8c6), LL(0xe887e8136fcdfbe8),
    LL(0x8726874ca113cb87), LL(0xb8dab8a9626d11b8), LL(0x0104010805020901), LL(0x4f214f426e9e0d4f),
    LL(0x36d836adee6c9b36), LL(0xa6a2a6590451ffa6), LL(0xd26fd2debdb90cd2), LL(0xf5f3f5fb06f70ef5),
    LL(0x79f979ef80f29679), LL(0x6fa16f5fcede306f), LL(0x917e91fcef3f6d91), LL(0x525552aa07a4f852),
    LL(0x609d6027fdc04760), LL(0xbccabc89766535bc), LL(0x9b569baccd2b379b), LL(0x8e028e048c018a8e),
    LL(0xa3b6a371155bd2a3), LL(0x0c300c603c186c0c), LL(0x7bf17bff8af6847b), LL(0x35d435b5e16a8035),
    LL(0x1d741de8693af51d), LL(0xe0a7e05347ddb3e0), LL(0xd77bd7f6acb321d7), LL(0xc22fc25eed999cc2),
    LL(0x2eb82e6d965c432e), LL(0x4b314b627a96294b), LL(0xfedffea321e15dfe), LL(0x5741578216aed557),
    LL(0x155415a8412abd15), LL(0x77c1779fb6eee877), LL(0x37dc37a5eb6e9237), LL(0xe5b3e57b56d79ee5),
    LL(0x9f469f8cd923139f), LL(0xf0e7f0d317fd23f0), LL(0x4a354a6a7f94204a), LL(0xda4fda9e95a944da),
    LL(0x587d58fa25b0a258), LL(0xc903c906ca8fcfc9), LL(0x29a429558d527c29), LL(0x0a280a5022145a0a),
    LL(0xb1feb1e14f7f50b1), LL(0xa0baa0691a5dc9a0), LL(0x6bb16b7fdad6146b), LL(0x852e855cab17d985),
    LL(0xbdcebd8173673cbd), LL(0x5d695dd234ba8f5d), LL(0x1040108050209010), LL(0xf4f7f4f303f507f4),
    LL(0xcb0bcb16c08bddcb), LL(0x3ef83eedc67cd33e), LL(0x05140528110a2d05), LL(0x6781671fe6ce7867),
    LL(0xe4b7e47353d597e4), LL(0x279c2725bb4e0227), LL(0x4119413258827341), LL(0x8b168b2c9d0ba78b),
    LL(0xa7a6a7510153f6a7), LL(0x7de97dcf94fab27d), LL(0x956e95dcfb374995), LL(0xd847d88e9fad56d8),
    LL(0xfbcbfb8b30eb70fb), LL(0xee9fee2371c1cdee), LL(0x7ced7cc791f8bb7c), LL(0x66856617e3cc7166),
    LL(0xdd53dda68ea77bdd), LL(0x175c17b84b2eaf17), LL(0x47014702468e4547), LL(0x9e429e84dc211a9e),
    LL(0xca0fca1ec589d4ca), LL(0x2db42d75995a582d), LL(0xbfc6bf9179632ebf), LL(0x071c07381b0e3f07),
    LL(0xad8ead012347acad), LL(0x5a755aea2fb4b05a), LL(0x8336836cb51bef83), LL(0x33cc3385ff66b633),
    LL(0x6391633ff2c65c63), LL(0x020802100a041202), LL(0xaa92aa39384993aa), LL(0x71d971afa8e2de71),
    LL(0xc807c80ecf8dc6c8), LL(0x196419c87d32d119), LL(0x4939497270923b49), LL(0xd943d9869aaf5fd9),
    LL(0xf2eff2c31df931f2), LL(0xe3abe34b48dba8e3), LL(0x5b715be22ab6b95b), LL(0x881a8834920dbc88),
    LL(0x9a529aa4c8293e9a), LL(0x2698262dbe4c0b26), LL(0x32c8328dfa64bf32), LL(0xb0fab0e94a7d59b0),
    LL(0xe983e91b6acff2e9), LL(0x0f3c0f78331e770f), LL(0xd573d5e6a6b733d5), LL(0x803a8074ba1df480),
    LL(0xbec2be997c6127be), LL(0xcd13cd26de87ebcd), LL(0x34d034bde4688934), LL(0x483d487a75903248),
    LL(0xffdbffab24e354ff), LL(0x7af57af78ff48d7a), LL(0x907a90f4ea3d6490), LL(0x5f615fc23ebe9d5f),
    LL(0x2080201da0403d20), LL(0x68bd6867d5d00f68), LL(0x1a681ad07234ca1a), LL(0xae82ae192c41b7ae),
    LL(0xb4eab4c95e757db4), LL(0x544d549a19a8ce54), LL(0x937693ece53b7f93), LL(0x2288220daa442f22),
    LL(0x648d6407e9c86364), LL(0xf1e3f1db12ff2af1), LL(0x73d173bfa2e6cc73), LL(0x124812905a248212),
    LL(0x401d403a5d807a40), LL(0x0820084028104808), LL(0xc32bc356e89b95c3), LL(0xec97ec337bc5dfec),
    LL(0xdb4bdb9690ab4ddb), LL(0xa1bea1611f5fc0a1), LL(0x8d0e8d1c8307918d), LL(0x3df43df5c97ac83d),
    LL(0x976697ccf1335b97), LL(0x0000000000000000), LL(0xcf1bcf36d483f9cf), LL(0x2bac2b4587566e2b),
    LL(0x76c57697b3ece176), LL(0x82328264b019e682), LL(0xd67fd6fea9b128d6), LL(0x1b6c1bd87736c31b),
    LL(0xb5eeb5c15b7774b5), LL(0xaf86af112943beaf), LL(0x6ab56a77dfd41d6a), LL(0x505d50ba0da0ea50),
    LL(0x450945124c8a5745), LL(0xf3ebf3cb18fb38f3), LL(0x30c0309df060ad30), LL(0xef9bef2b74c3c4ef),
    LL(0x3ffc3fe5c37eda3f), LL(0x554955921caac755), LL(0xa2b2a2791059dba2), LL(0xea8fea0365c9e9ea),
    LL(0x6589650fecca6a65), LL(0xbad2bab9686903ba), LL(0x2fbc2f65935e4a2f), LL(0xc027c04ee79d8ec0),
    LL(0xde5fdebe81a160de), LL(0x1c701ce06c38fc1c), LL(0xfdd3fdbb2ee746fd), LL(0x4d294d52649a1f4d),
    LL(0x927292e4e0397692), LL(0x75c9758fbceafa75), LL(0x061806301e0c3606), LL(0x8a128a249809ae8a),
    LL(0xb2f2b2f940794bb2), LL(0xe6bfe66359d185e6), LL(0x0e380e70361c7e0e), LL(0x1f7c1ff8633ee71f),
    LL(0x62956237f7c45562), LL(0xd477d4eea3b53ad4), LL(0xa89aa829324d81a8), LL(0x966296c4f4315296),
    LL(0xf9c3f99b3aef62f9), LL(0xc533c566f697a3c5), LL(0x25942535b14a1025), LL(0x597959f220b2ab59),
    LL(0x842a8454ae15d084), LL(0x72d572b7a7e4c572), LL(0x39e439d5dd72ec39), LL(0x4c2d4c5a6198164c),
    LL(0x5e655eca3bbc945e), LL(0x78fd78e785f09f78), LL(0x38e038ddd870e538), LL(0x8c0a8c148605988c),
    LL(0xd163d1c6b2bf17d1), LL(0xa5aea5410b57e4a5), LL(0xe2afe2434dd9a1e2), LL(0x6199612ff8c24e61),
    LL(0xb3f6b3f1457b42b3), LL(0x21842115a5423421), LL(0x9c4a9c94d625089c), LL(0x1e781ef0663cee1e),
    LL(0x4311432252866143), LL(0xc73bc776fc93b1c7), LL(0xfcd7fcb32be54ffc), LL(0x0410042014082404),
    LL(0x515951b208a2e351), LL(0x995e99bcc72f2599), LL(0x6da96d4fc4da226d), LL(0x0d340d68391a650d),
    LL(0xfacffa8335e979fa), LL(0xdf5bdfb684a369df), LL(0x7ee57ed79bfca97e), LL(0x2490243db4481924),
    LL(0x3bec3bc5d776fe3b), LL(0xab96ab313d4b9aab), LL(0xce1fce3ed181f0ce), LL(0x1144118855229911),
    LL(0x8f068f0c8903838f), LL(0x4e254e4a6b9c044e), LL(0xb7e6b7d1517366b7), LL(0xeb8beb0b60cbe0eb),
    LL(0x3cf03cfdcc78c13c), LL(0x813e817cbf1ffd81), LL(0x946a94d4fe354094), LL(0xf7fbf7eb0cf31cf7),
    LL(0xb9deb9a1676f18b9), LL(0x134c13985f268b13), LL(0x2cb02c7d9c58512c), LL(0xd36bd3d6b8bb05d3),
    LL(0xe7bbe76b5cd38ce7), LL(0x6ea56e57cbdc396e), LL(0xc437c46ef395aac4), LL(0x030c03180f061b03),
    LL(0x5645568a13acdc56), LL(0x440d441a49885e44), LL(0x7fe17fdf9efea07f), LL(0xa99ea921374f88a9),
    LL(0x2aa82a4d8254672a), LL(0xbbd6bbb16d6b0abb), LL(0xc123c146e29f87c1), LL(0x535153a202a6f153),
    LL(0xdc57dcae8ba572dc), LL(0x0b2c0b582716530b), LL(0x9d4e9d9cd327019d), LL(0x6cad6c47c1d82b6c),
    LL(0x31c43195f562a431), LL(0x74cd7487b9e8f374), LL(0xf6fff6e309f115f6), LL(0x4605460a438c4c46),
    LL(0xac8aac092645a5ac), LL(0x891e893c970fb589), LL(0x145014a04428b414), LL(0xe1a3e15b42dfbae1),
    LL(0x165816b04e2ca616), LL(0x3ae83acdd274f73a), LL(0x69b9696fd0d20669), LL(0x092409482d124109),
    LL(0x70dd70a7ade0d770), LL(0xb6e2b6d954716fb6), LL(0xd067d0ceb7bd1ed0), LL(0xed93ed3b7ec7d6ed),
    LL(0xcc17cc2edb85e2cc), LL(0x4215422a57846842), LL(0x985a98b4c22d2c98), LL(0xa4aaa4490e55eda4),
    LL(0x28a0285d88507528), LL(0x5c6d5cda31b8865c), LL(0xf8c7f8933fed6bf8), LL(0x86228644a411c286),
};
static const u64 rc[WHIRLPOOL_R + 1] = {
    LL(0x0000000000000000),
    LL(0x1823c6e887b8014f),
    LL(0x36a6d2f5796f9152),
    LL(0x60bc9b8ea30c7b35),
    LL(0x1de0d7c22e4bfe57),
    LL(0x157737e59ff04ada),
    LL(0x58c9290ab1a06b85),
    LL(0xbd5d10f4cb3e0567),
    LL(0xe427418ba77d95d8),
    LL(0xfbee7c66dd17479e),
    LL(0xca2dbf07ad5a8333),
};
static void processBuffer(struct NESSIEstruct * const structpointer) {
    int i, r;
    u64 K[8];
    u64 block[8];
    u64 state[8];
    u64 L[8];
    u8 *buffer = structpointer->buffer;
    for (i = 0; i < 8; i++, buffer += 8) {
        block[i] =
            (((u64)buffer[0]        ) << 56) ^
            (((u64)buffer[1] & 0xffL) << 48) ^
            (((u64)buffer[2] & 0xffL) << 40) ^
            (((u64)buffer[3] & 0xffL) << 32) ^
            (((u64)buffer[4] & 0xffL) << 24) ^
            (((u64)buffer[5] & 0xffL) << 16) ^
            (((u64)buffer[6] & 0xffL) <<  8) ^
            (((u64)buffer[7] & 0xffL)      );
    }
    state[0] = block[0] ^ (K[0] = structpointer->hash[0]);
    state[1] = block[1] ^ (K[1] = structpointer->hash[1]);
    state[2] = block[2] ^ (K[2] = structpointer->hash[2]);
    state[3] = block[3] ^ (K[3] = structpointer->hash[3]);
    state[4] = block[4] ^ (K[4] = structpointer->hash[4]);
    state[5] = block[5] ^ (K[5] = structpointer->hash[5]);
    state[6] = block[6] ^ (K[6] = structpointer->hash[6]);
    state[7] = block[7] ^ (K[7] = structpointer->hash[7]);
    for (r = 1; r <= WHIRLPOOL_R; r++) {
        L[0] =
            C0[(int)(K[0] >> 56)       ] ^
            C1[(int)(K[7] >> 48) & 0xff] ^
            C2[(int)(K[6] >> 40) & 0xff] ^
            C3[(int)(K[5] >> 32) & 0xff] ^
            C4[(int)(K[4] >> 24) & 0xff] ^
            C5[(int)(K[3] >> 16) & 0xff] ^
            C6[(int)(K[2] >>  8) & 0xff] ^
            C7[(int)(K[1]      ) & 0xff] ^
            rc[r];
        L[1] =
            C0[(int)(K[1] >> 56)       ] ^
            C1[(int)(K[0] >> 48) & 0xff] ^
            C2[(int)(K[7] >> 40) & 0xff] ^
            C3[(int)(K[6] >> 32) & 0xff] ^
            C4[(int)(K[5] >> 24) & 0xff] ^
            C5[(int)(K[4] >> 16) & 0xff] ^
            C6[(int)(K[3] >>  8) & 0xff] ^
            C7[(int)(K[2]      ) & 0xff];
        L[2] =
            C0[(int)(K[2] >> 56)       ] ^
            C1[(int)(K[1] >> 48) & 0xff] ^
            C2[(int)(K[0] >> 40) & 0xff] ^
            C3[(int)(K[7] >> 32) & 0xff] ^
            C4[(int)(K[6] >> 24) & 0xff] ^
            C5[(int)(K[5] >> 16) & 0xff] ^
            C6[(int)(K[4] >>  8) & 0xff] ^
            C7[(int)(K[3]      ) & 0xff];
        L[3] =
            C0[(int)(K[3] >> 56)       ] ^
            C1[(int)(K[2] >> 48) & 0xff] ^
            C2[(int)(K[1] >> 40) & 0xff] ^
            C3[(int)(K[0] >> 32) & 0xff] ^
            C4[(int)(K[7] >> 24) & 0xff] ^
            C5[(int)(K[6] >> 16) & 0xff] ^
            C6[(int)(K[5] >>  8) & 0xff] ^
            C7[(int)(K[4]      ) & 0xff];
        L[4] =
            C0[(int)(K[4] >> 56)       ] ^
            C1[(int)(K[3] >> 48) & 0xff] ^
            C2[(int)(K[2] >> 40) & 0xff] ^
            C3[(int)(K[1] >> 32) & 0xff] ^
            C4[(int)(K[0] >> 24) & 0xff] ^
            C5[(int)(K[7] >> 16) & 0xff] ^
            C6[(int)(K[6] >>  8) & 0xff] ^
            C7[(int)(K[5]      ) & 0xff];
        L[5] =
            C0[(int)(K[5] >> 56)       ] ^
            C1[(int)(K[4] >> 48) & 0xff] ^
            C2[(int)(K[3] >> 40) & 0xff] ^
            C3[(int)(K[2] >> 32) & 0xff] ^
            C4[(int)(K[1] >> 24) & 0xff] ^
            C5[(int)(K[0] >> 16) & 0xff] ^
            C6[(int)(K[7] >>  8) & 0xff] ^
            C7[(int)(K[6]      ) & 0xff];
        L[6] =
            C0[(int)(K[6] >> 56)       ] ^
            C1[(int)(K[5] >> 48) & 0xff] ^
            C2[(int)(K[4] >> 40) & 0xff] ^
            C3[(int)(K[3] >> 32) & 0xff] ^
            C4[(int)(K[2] >> 24) & 0xff] ^
            C5[(int)(K[1] >> 16) & 0xff] ^
            C6[(int)(K[0] >>  8) & 0xff] ^
            C7[(int)(K[7]      ) & 0xff];
        L[7] =
            C0[(int)(K[7] >> 56)       ] ^
            C1[(int)(K[6] >> 48) & 0xff] ^
            C2[(int)(K[5] >> 40) & 0xff] ^
            C3[(int)(K[4] >> 32) & 0xff] ^
            C4[(int)(K[3] >> 24) & 0xff] ^
            C5[(int)(K[2] >> 16) & 0xff] ^
            C6[(int)(K[1] >>  8) & 0xff] ^
            C7[(int)(K[0]      ) & 0xff];
        K[0] = L[0];
        K[1] = L[1];
        K[2] = L[2];
        K[3] = L[3];
        K[4] = L[4];
        K[5] = L[5];
        K[6] = L[6];
        K[7] = L[7];
        L[0] =
            C0[(int)(state[0] >> 56)       ] ^
            C1[(int)(state[7] >> 48) & 0xff] ^
            C2[(int)(state[6] >> 40) & 0xff] ^
            C3[(int)(state[5] >> 32) & 0xff] ^
            C4[(int)(state[4] >> 24) & 0xff] ^
            C5[(int)(state[3] >> 16) & 0xff] ^
            C6[(int)(state[2] >>  8) & 0xff] ^
            C7[(int)(state[1]      ) & 0xff] ^
            K[0];
        L[1] =
            C0[(int)(state[1] >> 56)       ] ^
            C1[(int)(state[0] >> 48) & 0xff] ^
            C2[(int)(state[7] >> 40) & 0xff] ^
            C3[(int)(state[6] >> 32) & 0xff] ^
            C4[(int)(state[5] >> 24) & 0xff] ^
            C5[(int)(state[4] >> 16) & 0xff] ^
            C6[(int)(state[3] >>  8) & 0xff] ^
            C7[(int)(state[2]      ) & 0xff] ^
            K[1];
        L[2] =
            C0[(int)(state[2] >> 56)       ] ^
            C1[(int)(state[1] >> 48) & 0xff] ^
            C2[(int)(state[0] >> 40) & 0xff] ^
            C3[(int)(state[7] >> 32) & 0xff] ^
            C4[(int)(state[6] >> 24) & 0xff] ^
            C5[(int)(state[5] >> 16) & 0xff] ^
            C6[(int)(state[4] >>  8) & 0xff] ^
            C7[(int)(state[3]      ) & 0xff] ^
            K[2];
        L[3] =
            C0[(int)(state[3] >> 56)       ] ^
            C1[(int)(state[2] >> 48) & 0xff] ^
            C2[(int)(state[1] >> 40) & 0xff] ^
            C3[(int)(state[0] >> 32) & 0xff] ^
            C4[(int)(state[7] >> 24) & 0xff] ^
            C5[(int)(state[6] >> 16) & 0xff] ^
            C6[(int)(state[5] >>  8) & 0xff] ^
            C7[(int)(state[4]      ) & 0xff] ^
            K[3];
        L[4] =
            C0[(int)(state[4] >> 56)       ] ^
            C1[(int)(state[3] >> 48) & 0xff] ^
            C2[(int)(state[2] >> 40) & 0xff] ^
            C3[(int)(state[1] >> 32) & 0xff] ^
            C4[(int)(state[0] >> 24) & 0xff] ^
            C5[(int)(state[7] >> 16) & 0xff] ^
            C6[(int)(state[6] >>  8) & 0xff] ^
            C7[(int)(state[5]      ) & 0xff] ^
            K[4];
        L[5] =
            C0[(int)(state[5] >> 56)       ] ^
            C1[(int)(state[4] >> 48) & 0xff] ^
            C2[(int)(state[3] >> 40) & 0xff] ^
            C3[(int)(state[2] >> 32) & 0xff] ^
            C4[(int)(state[1] >> 24) & 0xff] ^
            C5[(int)(state[0] >> 16) & 0xff] ^
            C6[(int)(state[7] >>  8) & 0xff] ^
            C7[(int)(state[6]      ) & 0xff] ^
            K[5];
        L[6] =
            C0[(int)(state[6] >> 56)       ] ^
            C1[(int)(state[5] >> 48) & 0xff] ^
            C2[(int)(state[4] >> 40) & 0xff] ^
            C3[(int)(state[3] >> 32) & 0xff] ^
            C4[(int)(state[2] >> 24) & 0xff] ^
            C5[(int)(state[1] >> 16) & 0xff] ^
            C6[(int)(state[0] >>  8) & 0xff] ^
            C7[(int)(state[7]      ) & 0xff] ^
            K[6];
        L[7] =
            C0[(int)(state[7] >> 56)       ] ^
            C1[(int)(state[6] >> 48) & 0xff] ^
            C2[(int)(state[5] >> 40) & 0xff] ^
            C3[(int)(state[4] >> 32) & 0xff] ^
            C4[(int)(state[3] >> 24) & 0xff] ^
            C5[(int)(state[2] >> 16) & 0xff] ^
            C6[(int)(state[1] >>  8) & 0xff] ^
            C7[(int)(state[0]      ) & 0xff] ^
            K[7];
        state[0] = L[0];
        state[1] = L[1];
        state[2] = L[2];
        state[3] = L[3];
        state[4] = L[4];
        state[5] = L[5];
        state[6] = L[6];
        state[7] = L[7];
    }
    structpointer->hash[0] ^= state[0] ^ block[0];
    structpointer->hash[1] ^= state[1] ^ block[1];
    structpointer->hash[2] ^= state[2] ^ block[2];
    structpointer->hash[3] ^= state[3] ^ block[3];
    structpointer->hash[4] ^= state[4] ^ block[4];
    structpointer->hash[5] ^= state[5] ^ block[5];
    structpointer->hash[6] ^= state[6] ^ block[6];
    structpointer->hash[7] ^= state[7] ^ block[7];
}
void NESSIEinit(struct NESSIEstruct * const structpointer) {
    int i;
    memset(structpointer->bitLength, 0, 32);
    structpointer->bufferBits = structpointer->bufferPos = 0;
    structpointer->buffer[0] = 0;
    for (i = 0; i < 8; i++) {
        structpointer->hash[i] = 0L;
    }
}
void NESSIEadd(const unsigned char * const source,
			   unsigned long sourceBits,
               struct NESSIEstruct * const structpointer) {
    int sourcePos    = 0;
    int sourceGap    = (8 - ((int)sourceBits & 7)) & 7;
	int bufferRem    = structpointer->bufferBits & 7;
	int i;
    u32 b, carry;
    u8 *buffer       = structpointer->buffer;
    u8 *bitLength    = structpointer->bitLength;
    int bufferBits   = structpointer->bufferBits;
    int bufferPos    = structpointer->bufferPos;
    u64 value = sourceBits;
    for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != LL(0)); i--) {
        carry += bitLength[i] + ((u32)value & 0xff);
        bitLength[i] = (u8)carry;
        carry >>= 8;
        value >>= 8;
    }
    while (sourceBits > 8) {
        b = ((source[sourcePos] << sourceGap) & 0xff) |
            ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap));
        buffer[bufferPos++] |= (u8)(b >> bufferRem);
        bufferBits += 8 - bufferRem;
        if (bufferBits == DIGESTBITS) {
            processBuffer(structpointer);
            bufferBits = bufferPos = 0;
        }
        buffer[bufferPos] = b << (8 - bufferRem);
        bufferBits += bufferRem;
        sourceBits -= 8;
        sourcePos++;
    }
    if (sourceBits > 0) {
        b = (source[sourcePos] << sourceGap) & 0xff;
        buffer[bufferPos] |= b >> bufferRem;
    } else {
        b = 0;
    }
    if (bufferRem + sourceBits < 8) {
        bufferBits += sourceBits;
    } else {
        bufferPos++;
        bufferBits += 8 - bufferRem;
        sourceBits -= 8 - bufferRem;
        if (bufferBits == DIGESTBITS) {
            processBuffer(structpointer);
            bufferBits = bufferPos = 0;
        }
        buffer[bufferPos] = b << (8 - bufferRem);
        bufferBits += (int)sourceBits;
    }
    structpointer->bufferBits   = bufferBits;
    structpointer->bufferPos    = bufferPos;
}
void NESSIEadd64(const unsigned char * const source,
			   uint64_t sourceBits,
               struct NESSIEstruct * const structpointer) {
    int sourcePos    = 0;
    int sourceGap    = (8 - ((int)sourceBits & 7)) & 7;
    int bufferRem    = structpointer->bufferBits & 7;
    int i;
    u32 b, carry;
    u8 *buffer       = structpointer->buffer;
    u8 *bitLength    = structpointer->bitLength;
    int bufferBits   = structpointer->bufferBits;
    int bufferPos    = structpointer->bufferPos;
    u64 value = sourceBits;
    for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != LL(0)); i--) {
        carry += bitLength[i] + ((u32)value & 0xff);
        bitLength[i] = (u8)carry;
        carry >>= 8;
        value >>= 8;
    }
    while (sourceBits > 8) {
        b = ((source[sourcePos] << sourceGap) & 0xff) |
            ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap));
        buffer[bufferPos++] |= (u8)(b >> bufferRem);
        bufferBits += 8 - bufferRem;
        if (bufferBits == DIGESTBITS) {
            processBuffer(structpointer);
            bufferBits = bufferPos = 0;
        }
        buffer[bufferPos] = b << (8 - bufferRem);
        bufferBits += bufferRem;
        sourceBits -= 8;
        sourcePos++;
    }
    if (sourceBits > 0) {
        b = (source[sourcePos] << sourceGap) & 0xff;
        buffer[bufferPos] |= b >> bufferRem;
    } else {
        b = 0;
    }
    if (bufferRem + sourceBits < 8) {
        bufferBits += sourceBits;
    } else {
        bufferPos++;
        bufferBits += 8 - bufferRem;
        sourceBits -= 8 - bufferRem;
        if (bufferBits == DIGESTBITS) {
            processBuffer(structpointer);
            bufferBits = bufferPos = 0;
        }
        buffer[bufferPos] = b << (8 - bufferRem);
        bufferBits += (int)sourceBits;
    }
    structpointer->bufferBits   = bufferBits;
    structpointer->bufferPos    = bufferPos;
}
void NESSIEfinalize(struct NESSIEstruct * const structpointer,
                    unsigned char * const result) {
    int i;
    u8 *buffer      = structpointer->buffer;
    const u8 *bitLength   = structpointer->bitLength;
    int bufferBits  = structpointer->bufferBits;
    int bufferPos   = structpointer->bufferPos;
    u8 *digest      = result;
    buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
    bufferPos++;
    if (bufferPos > WBLOCKBYTES - LENGTHBYTES) {
        if (bufferPos < WBLOCKBYTES) {
            memset(&buffer[bufferPos], 0, WBLOCKBYTES - bufferPos);
        }
        processBuffer(structpointer);
        bufferPos = 0;
    }
    if (bufferPos < WBLOCKBYTES - LENGTHBYTES) {
        memset(&buffer[bufferPos], 0, (WBLOCKBYTES - LENGTHBYTES) - bufferPos);
    }
    bufferPos = WBLOCKBYTES - LENGTHBYTES;
    memcpy(&buffer[WBLOCKBYTES - LENGTHBYTES], bitLength, LENGTHBYTES);
    processBuffer(structpointer);
    for (i = 0; i < DIGESTBYTES/8; i++) {
        digest[0] = (u8)(structpointer->hash[i] >> 56);
        digest[1] = (u8)(structpointer->hash[i] >> 48);
        digest[2] = (u8)(structpointer->hash[i] >> 40);
        digest[3] = (u8)(structpointer->hash[i] >> 32);
        digest[4] = (u8)(structpointer->hash[i] >> 24);
        digest[5] = (u8)(structpointer->hash[i] >> 16);
        digest[6] = (u8)(structpointer->hash[i] >>  8);
        digest[7] = (u8)(structpointer->hash[i]      );
        digest += 8;
    }
    structpointer->bufferBits   = bufferBits;
    structpointer->bufferPos    = bufferPos;
}
/// LICENSE_END.16



/// LICENSE_START.13
// This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/)
// main repo: https://github.com/wangyi-fudan/wyhash
// author: 王一 Wang Yi <godspeed_china@yeah.net>
// contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric
/* quick example:
   string s="fjsakfdsjkf";
   uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp);

   A bit reworked
*/
//likely and unlikely macros
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
  #define _likely_(x)  __builtin_expect(x,1)
  #define _unlikely_(x)  __builtin_expect(x,0)
#else
  #define _likely_(x) (x)
  #define _unlikely_(x) (x)
#endif
//128bit multiply function
///static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if defined(__SIZEOF_INT128__)
  __uint128_t r=*A; r*=*B;
  *A=(uint64_t)r; *B=(uint64_t)(r>>64);
#elif defined(_MSC_VER) && defined(_M_X64)
  *A=_umul128(*A,*B,B);
#else
  uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
  uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
  lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
  *A=lo;  *B=hi;
#endif
}
//multiply and xor mix function, aka MUM
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//endian macros
#ifdef BIG
    #define WYHASH_LITTLE_ENDIAN 0
#else
    #define WYHASH_LITTLE_ENDIAN 1
#endif
//read functions
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p)
{ uint64_t v; memcpy(&v, p, 8);
#ifdef ANCIENT
  return  (v >> 56) |
           ((v >> 40) & 0x000000000000FF00ULL) |
           ((v >> 24) & 0x0000000000FF0000ULL) |
           ((v >>  8) & 0x00000000FF000000ULL) |
           ((v <<  8) & 0x000000FF00000000ULL) |
           ((v << 24) & 0x0000FF0000000000ULL) |
           ((v << 40) & 0x00FF000000000000ULL) |
            (v << 56);
#else
    return __builtin_bswap64(v);
#endif

}
static inline uint64_t _wyr4(const uint8_t *p)
{ uint32_t v; memcpy(&v, p, 4);

#ifdef ANCIENT
    return (v >> 24) |
          ((v >>  8) & 0x0000FF00) |
          ((v <<  8) & 0x00FF0000) |
           (v << 24);
#else
    return __builtin_bswap32(v);
#endif

}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#else
static inline uint64_t _wyr8(const uint8_t *p) {
  uint64_t v; memcpy(&v, p, 8);
  return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >>  8) & 0xff000000)| ((v <<  8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
}
static inline uint64_t _wyr4(const uint8_t *p) {
  uint32_t v; memcpy(&v, p, 4);
  return (((v >> 24) & 0xff)| ((v >>  8) & 0xff00)| ((v <<  8) & 0xff0000)| ((v << 24) & 0xff000000));
}
#endif
static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash main function
static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){
  const uint8_t *p=(const uint8_t *)key; seed^=*secret;	uint64_t	a,	b;
  if(_likely_(len<=16)){
    if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }
    else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;}
    else a=b=0;
  }
  else{
    size_t i=len;
    if(_unlikely_(i>48)){
      uint64_t see1=seed, see2=seed;
      do{
        seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);
        see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1);
        see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2);
        p+=48; i-=48;
      }while(_likely_(i>48));
      seed^=see1^see2;
    }
    while(_unlikely_(i>16)){  seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);  i-=16; p+=16;  }
    a=_wyr8(p+i-16);  b=_wyr8(p+i-8);
  }
  return _wymix(secret[1]^len,_wymix(a^secret[1],b^seed));
}
//the default secret parameters
///static const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull};
//The wyrand PRNG that pass BigCrush and PractRand
static inline uint64_t wyrand(uint64_t *seed){ *seed+=0xa0761d6478bd642full; return _wymix(*seed,*seed^0xe7037ed1a0b428dbull);}
//make your own secret
static inline void make_secret(uint64_t seed, uint64_t *secret){
  uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
  for(size_t i=0;i<4;i++){
    uint8_t ok;
    do{
      ok=1; secret[i]=0;
      for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
      if(secret[i]%2==0){ ok=0; continue; }
      for(size_t j=0;j<i;j++) {
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
        if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
        if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#else
        //manual popcount
        uint64_t x = secret[j]^secret[i];
        x -= (x >> 1) & 0x5555555555555555;
        x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
        x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
        x = (x * 0x0101010101010101) >> 56;
        if(x!=32){ ok=0; break; }
#endif
      }
    }while(!ok);
  }
}
/// LICENSE_END.13




/// LICENSE_START.14

/// I made a bit of fix of this incredibly (perhaps even too) complex implementation
/*
 * xxHash - Extremely Fast Hash algorithm
 * Header File
 * Copyright (C) 2012-2020 Yann Collet
 *
 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
 */

#  define XXH_NAMESPACE XXH_INLINE_
#  define XXH_IPREF(Id)   XXH_INLINE_ ## Id
#  define XXH_OK XXH_IPREF(XXH_OK)
#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)


/* ****************************************************************
 *  Stable API
 *****************************************************************/


#  define XXH_CAT(A,B) A##B
#  define XXH_NAME2(A,B) XXH_CAT(A,B)
/* XXH64 */
#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
/* XXH3_64bits */
/*  */
#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)


/* ****************************
*  Definitions
******************************/
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
typedef uint32_t XXH32_hash_t;
typedef uint64_t XXH64_hash_t;
typedef uint8_t xxh_u8;
typedef XXH32_hash_t xxh_u32;

XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);

/*******   Streaming   *******/
typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
XXH64_state_t* XXH64_createState(void);

XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);

/*******   Canonical representation   *******/

#define XXH3_SECRET_SIZE_MIN 136

typedef struct XXH3_state_s XXH3_state_t;

typedef struct {
 XXH64_hash_t low64;
 XXH64_hash_t high64;
} XXH128_hash_t;

XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);

struct XXH64_state_s {
   XXH64_hash_t total_len;
   XXH64_hash_t v1;
   XXH64_hash_t v2;
   XXH64_hash_t v3;
   XXH64_hash_t v4;
   XXH64_hash_t mem64[4];
   XXH32_hash_t memsize;
   XXH32_hash_t reserved32;  /* required for padding anyway */
   XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
};   /* typedef'd to XXH64_state_t */

#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)   /* C11+ */
#  include <stdalign.h>
#  define XXH_ALIGN(n)      alignas(n)
#elif defined(__GNUC__)
#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
#elif defined(_MSC_VER)
#  define XXH_ALIGN(n)      __declspec(align(n))
#else
#  define XXH_ALIGN(n)   /* disabled */
#endif

/* Old GCC versions only accept the attribute after the type in structures. */
#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
    && defined(__GNUC__)
#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
#else
#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
#endif

#define XXH3_INTERNALBUFFER_SIZE 256
#define XXH3_SECRET_DEFAULT_SIZE 192
struct XXH3_state_s {
   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
   /* used to store a custom secret generated from a seed */
   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
   XXH32_hash_t bufferedSize;
   XXH32_hash_t reserved32;
   size_t nbStripesSoFar;
   XXH64_hash_t totalLen;
   size_t nbStripesPerBlock;
   size_t secretLimit;
   XXH64_hash_t seed;
   XXH64_hash_t reserved64;
   const unsigned char* extSecret;  /* reference to external secret;
                                     * if == NULL, use .customSecret instead */
   /* note: there may be some padding at the end due to alignment on 64 bytes */
}; /* typedef'd to XXH3_state_t */

#undef XXH_ALIGN_MEMBER

/* When the XXH3_state_t structure is merely emplaced on stack,
 * it should be initialized with XXH3_INITSTATE() or a memset()
 * in case its first reset uses XXH3_NNbits_reset_withSeed().
 * This init can be omitted if the first reset uses default or _withSecret mode.
 * This operation isn't necessary when the state is created with XXH3_createState().
 * Note that this doesn't prepare the state for a streaming operation,
 * it's still necessary to use XXH3_NNbits_reset*() afterwards.
 */
#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }

/* simple short-cut to pre-selected XXH3_128bits variant */
XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);

#  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
#    define XXH_FORCE_MEMORY_ACCESS 2
#  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
  (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
#    define XXH_FORCE_MEMORY_ACCESS 1
#  endif

#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
#endif

#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
#    define XXH_FORCE_ALIGN_CHECK 0
#  else
#    define XXH_FORCE_ALIGN_CHECK 1
#  endif
#endif

#ifndef XXH_NO_INLINE_HINTS
#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
#    define XXH_NO_INLINE_HINTS 1
#  else
#    define XXH_NO_INLINE_HINTS 0
#  endif
#endif

#ifndef XXH_REROLL
#  if defined(__OPTIMIZE_SIZE__)
#    define XXH_REROLL 1
#  else
#    define XXH_REROLL 0
#  endif
#endif


/* *************************************
*  Includes & Memory related functions
***************************************/
/*!
 * Modify the local functions below should you wish to use
 * different memory routines for malloc() and free()
 */

static void* XXH_malloc(size_t s) {  return franz_malloc(s);}
/*! and for memcpy() */
static void* XXH_memcpy(void* dest, const void* src, size_t size)
{
    return memcpy(dest,src,size);
}



/* *************************************
*  Compiler Specific Options
***************************************/
#ifdef _MSC_VER /* Visual Studio warning fix */
#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif

#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
#  if defined(__GNUC__)
#    define XXH_FORCE_INLINE static __attribute__((unused))
#  else
#    define XXH_FORCE_INLINE static
#  endif
#  define XXH_NO_INLINE static
/* enable inlining hints */
#elif defined(_MSC_VER)  /* Visual Studio */
#  define XXH_FORCE_INLINE static __forceinline
#  define XXH_NO_INLINE static __declspec(noinline)
#elif defined(__GNUC__)
#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
#  define XXH_NO_INLINE static __attribute__((noinline))
#elif defined (__cplusplus) \
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
#  define XXH_FORCE_INLINE static inline
#  define XXH_NO_INLINE static
#else
#  define XXH_FORCE_INLINE static
#  define XXH_NO_INLINE static
#endif



#ifndef XXH_DEBUGLEVEL
#  ifdef DEBUGLEVEL /* backwards compat */
#    define XXH_DEBUGLEVEL DEBUGLEVEL
#  else
#    define XXH_DEBUGLEVEL 0
#  endif
#endif

#if (XXH_DEBUGLEVEL>=1)
#  include <assert.h>   /* note: can still be disabled with NDEBUG */
#  define XXH_ASSERT(c)   assert(c)
#else
#  define XXH_ASSERT(c)   ((void)0)
#endif

/* note: use after variable declarations */
#define XXH_STATIC_ASSERT(c)  do { enum { XXH_sa = 1/(int)(!!(c)) }; } while (0)


/* *************************************
*  Basic Types
***************************************/


/* ***   Memory access   *** */

/*
 * Manual byteshift. Best for old compilers which don't inline memcpy.
 * We actually directly use XXH_readLE32 and XXH_readBE32.
 */
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))

/*
 * Force direct memory access. Only works on CPU which support unaligned memory
 * access in hardware.
 */
static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }

#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))

/*
 * __pack instructions are safer but compiler specific, hence potentially
 * problematic for some compilers.
 *
 * Currently only defined for GCC and ICC.
 */
static xxh_u32 XXH_read32(const void* ptr)
{
    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
    return ((const xxh_unalign*)ptr)->u32;
}

#else

/*
 * Portable and safe solution. Generally efficient.
 * see: https://stackoverflow.com/a/32095106/646947
 */
static xxh_u32 XXH_read32(const void* memPtr)
{
    xxh_u32 val;
    memcpy(&val, memPtr, sizeof(val));
    return val;
}

#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */


/* ***   Endianess   *** */
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;

#ifdef BIG
#    define XXH_CPU_LITTLE_ENDIAN 0
#else
#    define XXH_CPU_LITTLE_ENDIAN 1
#endif


/* ****************************************
*  Compiler-specific Functions and Macros
******************************************/
#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)

#ifdef __has_builtin
#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
#else
#  define XXH_HAS_BUILTIN(x) 0
#endif

#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
#  define XXH_rotl32 __builtin_rotateleft32
#  define XXH_rotl64 __builtin_rotateleft64
/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
#elif defined(_MSC_VER)
#  define XXH_rotl32(x,r) _rotl(x,r)
#  define XXH_rotl64(x,r) _rotl64(x,r)
#else
#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
#endif

#if defined(_MSC_VER)     /* Visual Studio */
#  define XXH_swap32 _byteswap_ulong
#elif XXH_GCC_VERSION >= 403
#  define XXH_swap32 __builtin_bswap32
#else
static xxh_u32 XXH_swap32 (xxh_u32 x)
{
    return  ((x << 24) & 0xff000000 ) |
            ((x <<  8) & 0x00ff0000 ) |
            ((x >>  8) & 0x0000ff00 ) |
            ((x >> 24) & 0x000000ff );
}
#endif


/* ***************************
*  Memory reads
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;

/*
 * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
 *
 * This is ideal for older compilers which don't inline memcpy.
 */
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))

XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
{
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
    return bytePtr[0]
         | ((xxh_u32)bytePtr[1] << 8)
         | ((xxh_u32)bytePtr[2] << 16)
         | ((xxh_u32)bytePtr[3] << 24);
}
#else
XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
{
    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
}
#endif

XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void* ptr, XXH_alignment align)
{
    if (align==XXH_unaligned) {
        return XXH_readLE32(ptr);
    } else {
        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
    }
}


/* *************************************
*  Misc
***************************************/


/* *******************************************************************
*  32-bit hash functions
*********************************************************************/
static const xxh_u32 XXH_PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
static const xxh_u32 XXH_PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
static const xxh_u32 XXH_PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */


#define XXH_get32bits(p) XXH_readLE32_align(p, align)
#  undef XXH_PROCESS1
#  undef XXH_PROCESS4


/* *******************************************************************
*  64-bit hash functions
*********************************************************************/

/*******   Memory access   *******/

typedef XXH64_hash_t xxh_u64;

#ifndef XXH_REROLL_XXH64
#  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
   || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
     || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
     || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
     || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
   || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
#    define XXH_REROLL_XXH64 1
#  else
#    define XXH_REROLL_XXH64 0
#  endif
#endif /* !defined(XXH_REROLL_XXH64) */

#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
/*
 * Manual byteshift. Best for old compilers which don't inline memcpy.
 * We actually directly use XXH_readLE64 and XXH_readBE64.
 */
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))

/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }

#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))

/*
 * __pack instructions are safer, but compiler specific, hence potentially
 * problematic for some compilers.
 *
 * Currently only defined for GCC and ICC.
 */
static xxh_u64 XXH_read64(const void* ptr)
{
    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
    return ((const xxh_unalign64*)ptr)->u64;
}

#else

/*
 * Portable and safe solution. Generally efficient.
 * see: https://stackoverflow.com/a/32095106/646947
 */
static xxh_u64 XXH_read64(const void* memPtr)
{
    xxh_u64 val;
    memcpy(&val, memPtr, sizeof(val));
    return val;
}

#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */

#if defined(_MSC_VER)     /* Visual Studio */
#  define XXH_swap64 _byteswap_uint64
#elif XXH_GCC_VERSION >= 403
#  define XXH_swap64 __builtin_bswap64
#else
static xxh_u64 XXH_swap64 (xxh_u64 x)
{
    return  ((x << 56) & 0xff00000000000000ULL) |
            ((x << 40) & 0x00ff000000000000ULL) |
            ((x << 24) & 0x0000ff0000000000ULL) |
            ((x << 8)  & 0x000000ff00000000ULL) |
            ((x >> 8)  & 0x00000000ff000000ULL) |
            ((x >> 24) & 0x0000000000ff0000ULL) |
            ((x >> 40) & 0x000000000000ff00ULL) |
            ((x >> 56) & 0x00000000000000ffULL);
}
#endif


/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))

XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
{
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
    return bytePtr[0]
         | ((xxh_u64)bytePtr[1] << 8)
         | ((xxh_u64)bytePtr[2] << 16)
         | ((xxh_u64)bytePtr[3] << 24)
         | ((xxh_u64)bytePtr[4] << 32)
         | ((xxh_u64)bytePtr[5] << 40)
         | ((xxh_u64)bytePtr[6] << 48)
         | ((xxh_u64)bytePtr[7] << 56);
}
#else
XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
{
    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
}
#endif

XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void* ptr, XXH_alignment align)
{
    if (align==XXH_unaligned)
        return XXH_readLE64(ptr);
    else
        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
}


/*******   xxh64   *******/

static const xxh_u64 XXH_PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
static const xxh_u64 XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
static const xxh_u64 XXH_PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
static const xxh_u64 XXH_PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
static const xxh_u64 XXH_PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */


static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
{
    acc += input * XXH_PRIME64_2;
    acc  = XXH_rotl64(acc, 31);
    acc *= XXH_PRIME64_1;
    return acc;
}

static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
{
    val  = XXH64_round(0, val);
    acc ^= val;
    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
    return acc;
}

static xxh_u64 XXH64_avalanche(xxh_u64 h64)
{
    h64 ^= h64 >> 33;
    h64 *= XXH_PRIME64_2;
    h64 ^= h64 >> 29;
    h64 *= XXH_PRIME64_3;
    h64 ^= h64 >> 32;
    return h64;
}


#define XXH_get64bits(p) XXH_readLE64_align(p, align)

static xxh_u64
XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
#define XXH_PROCESS1_64 do {                                   \
    h64 ^= (*ptr++) * XXH_PRIME64_5;                           \
    h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;                 \
} while (0)

#define XXH_PROCESS4_64 do {                                   \
    h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;      \
    ptr += 4;                                              \
    h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;     \
} while (0)

#define XXH_PROCESS8_64 do {                                   \
    xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
    ptr += 8;                                              \
    h64 ^= k1;                                             \
    h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;     \
} while (0)

    /* Rerolled version for 32-bit targets is faster and much smaller. */
    if (XXH_REROLL || XXH_REROLL_XXH64) {
        len &= 31;
        while (len >= 8) {
            XXH_PROCESS8_64;
            len -= 8;
        }
        if (len >= 4) {
            XXH_PROCESS4_64;
            len -= 4;
        }
        while (len > 0) {
            XXH_PROCESS1_64;
            --len;
        }
         return  XXH64_avalanche(h64);
    } else {
        switch(len & 31) {
           case 24: XXH_PROCESS8_64;
                         /* fallthrough */
           case 16: XXH_PROCESS8_64;
                         /* fallthrough */
           case  8: XXH_PROCESS8_64;
                    return XXH64_avalanche(h64);

           case 28: XXH_PROCESS8_64;
                         /* fallthrough */
           case 20: XXH_PROCESS8_64;
                         /* fallthrough */
           case 12: XXH_PROCESS8_64;
                         /* fallthrough */
           case  4: XXH_PROCESS4_64;
                    return XXH64_avalanche(h64);

           case 25: XXH_PROCESS8_64;
                         /* fallthrough */
           case 17: XXH_PROCESS8_64;
                         /* fallthrough */
           case  9: XXH_PROCESS8_64;
                    XXH_PROCESS1_64;
                    return XXH64_avalanche(h64);

           case 29: XXH_PROCESS8_64;
                         /* fallthrough */
           case 21: XXH_PROCESS8_64;
                         /* fallthrough */
           case 13: XXH_PROCESS8_64;
                         /* fallthrough */
           case  5: XXH_PROCESS4_64;
                    XXH_PROCESS1_64;
                    return XXH64_avalanche(h64);

           case 26: XXH_PROCESS8_64;
                         /* fallthrough */
           case 18: XXH_PROCESS8_64;
                         /* fallthrough */
           case 10: XXH_PROCESS8_64;
                    XXH_PROCESS1_64;
                    XXH_PROCESS1_64;
                    return XXH64_avalanche(h64);

           case 30: XXH_PROCESS8_64;
                         /* fallthrough */
           case 22: XXH_PROCESS8_64;
                         /* fallthrough */
           case 14: XXH_PROCESS8_64;
                         /* fallthrough */
           case  6: XXH_PROCESS4_64;
                    XXH_PROCESS1_64;
                    XXH_PROCESS1_64;
                    return XXH64_avalanche(h64);

           case 27: XXH_PROCESS8_64;
                         /* fallthrough */
           case 19: XXH_PROCESS8_64;
                         /* fallthrough */
           case 11: XXH_PROCESS8_64;
                    XXH_PROCESS1_64;
                    XXH_PROCESS1_64;
                    XXH_PROCESS1_64;
                    return XXH64_avalanche(h64);

           case 31: XXH_PROCESS8_64;
                         /* fallthrough */
           case 23: XXH_PROCESS8_64;
                         /* fallthrough */
           case 15: XXH_PROCESS8_64;
                         /* fallthrough */
           case  7: XXH_PROCESS4_64;
                         /* fallthrough */
           case  3: XXH_PROCESS1_64;
                         /* fallthrough */
           case  2: XXH_PROCESS1_64;
                         /* fallthrough */
           case  1: XXH_PROCESS1_64;
                         /* fallthrough */
           case  0: return XXH64_avalanche(h64);
        }
    }
    /* impossible to reach */
    XXH_ASSERT(0);
    return 0;  /* unreachable, but some compilers complain without it */
}

#  undef XXH_PROCESS1_64
#  undef XXH_PROCESS4_64
#  undef XXH_PROCESS8_64

/*******   Hash Streaming   *******/

XXH64_state_t* XXH64_createState(void)
{
	 g_allocatedram+=sizeof(XXH64_state_t);
    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}

XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
{
    XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
    memset(&state, 0, sizeof(state));
    state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
    state.v2 = seed + XXH_PRIME64_2;
    state.v3 = seed + 0;
    state.v4 = seed - XXH_PRIME64_1;
     /* do not write into reserved64, might be removed in a future version */
    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
    return XXH_OK;
}

XXH_errorcode
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
{
    if (input==NULL)
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
        return XXH_OK;
#else
        return XXH_ERROR;
#endif

    {   const xxh_u8* p = (const xxh_u8*)input;
        const xxh_u8* const bEnd = p + len;

        state->total_len += len;

        if (state->memsize + len < 32) {  /* fill in tmp buffer */
            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
            state->memsize += (xxh_u32)len;
            return XXH_OK;
        }

        if (state->memsize) {   /* tmp buffer is full */
            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
            p += 32-state->memsize;
            state->memsize = 0;
        }

        if (p+32 <= bEnd) {
            const xxh_u8* const limit = bEnd - 32;
            xxh_u64 v1 = state->v1;
            xxh_u64 v2 = state->v2;
            xxh_u64 v3 = state->v3;
            xxh_u64 v4 = state->v4;

            do {
                v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
                v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
                v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
                v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
            } while (p<=limit);

            state->v1 = v1;
            state->v2 = v2;
            state->v3 = v3;
            state->v4 = v4;
        }

        if (p < bEnd) {
            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
            state->memsize = (unsigned)(bEnd-p);
        }
    }

    return XXH_OK;
}

XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
{
    xxh_u64 h64;

    if (state->total_len >= 32) {
        xxh_u64 const v1 = state->v1;
        xxh_u64 const v2 = state->v2;
        xxh_u64 const v3 = state->v3;
        xxh_u64 const v4 = state->v4;

        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
        h64 = XXH64_mergeRound(h64, v1);
        h64 = XXH64_mergeRound(h64, v2);
        h64 = XXH64_mergeRound(h64, v3);
        h64 = XXH64_mergeRound(h64, v4);
    } else {
        h64  = state->v3 /*seed*/ + XXH_PRIME64_5;
    }

    h64 += (xxh_u64) state->total_len;

    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
}


/******* Canonical representation   *******/


/* *********************************************************************
*  XXH3
*  New generation hash designed for speed on small keys and vectorization
************************************************************************ */

/* ===   Compiler specifics   === */

#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L  && (!defined(SOLARIS))  /* franzfix >= C99 */
#  define XXH_RESTRICT   restrict
#else
/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
#  define XXH_RESTRICT   /* disable */
#endif

#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
  || defined(__clang__)
#    define XXH_likely(x) __builtin_expect(x, 1)
///#    define XXH_unlikely(x) __builtin_expect(x, 0)
#else
#    define XXH_likely(x) (x)
///#    define XXH_unlikely(x) (x)
#endif

#if defined(__GNUC__)
#  if defined(__AVX2__)
#    include <immintrin.h>
#  elif defined(__SSE2__)
#    include <emmintrin.h>
#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
#    define inline __inline__  /* circumvent a clang bug */
#    include <arm_neon.h>
#    undef inline
#  endif
#elif defined(_MSC_VER)
#  include <intrin.h>
#endif

#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
#endif

/* ==========================================
 * Vectorization detection
 * ========================================== */
#define XXH_SCALAR 0  /* Portable scalar version */
#define XXH_SSE2   1  /* SSE2 for Pentium 4 and all x86_64 */
#define XXH_AVX2   2  /* AVX2 for Haswell and Bulldozer */
#define XXH_AVX512 3  /* AVX512 for Skylake and Icelake */
#define XXH_NEON   4  /* NEON for most ARMv7-A and all AArch64 */
#define XXH_VSX    5  /* VSX and ZVector for POWER8/z13 */

#ifndef XXH_VECTOR    /* can be defined on command line */
#  if defined(__AVX512F__)
#    define XXH_VECTOR XXH_AVX512
#  elif defined(__AVX2__)
#    define XXH_VECTOR XXH_AVX2
#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
#    define XXH_VECTOR XXH_SSE2
#  elif defined(__GNUC__) /* msvc support maybe later */ \
  && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
  && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
#    define XXH_VECTOR XXH_NEON
#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
     || (defined(__s390x__) && defined(__VEC__)) \
     && defined(__GNUC__) /* TODO: IBM XL */
#    define XXH_VECTOR XXH_VSX
#  else
#    define XXH_VECTOR XXH_SCALAR
#  endif
#endif

/*
 * Controls the alignment of the accumulator,
 * for compatibility with aligned vector loads, which are usually faster.
 */
#ifndef XXH_ACC_ALIGN
#  if defined(XXH_X86DISPATCH)
#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
#     define XXH_ACC_ALIGN 8
#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
#     define XXH_ACC_ALIGN 16
#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
#     define XXH_ACC_ALIGN 32
#  elif XXH_VECTOR == XXH_NEON  /* neon */
#     define XXH_ACC_ALIGN 16
#  elif XXH_VECTOR == XXH_VSX   /* vsx */
#     define XXH_ACC_ALIGN 16
#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
#     define XXH_ACC_ALIGN 64
#  endif
#endif

#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
#else
#  define XXH_SEC_ALIGN 8
#endif

#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
#  pragma GCC push_options
#  pragma GCC optimize("-O2")
#endif


#if XXH_VECTOR == XXH_NEON
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
   && defined(__GNUC__) \
   && !defined(__aarch64__) && !defined(__arm64__)
#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
    do {                                                                                    \
      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
   } while (0)
# else
#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
    do {                                                                                  \
      (outLo) = vmovn_u64    (in);                                                        \
      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
    } while (0)
# endif
#endif  /* XXH_VECTOR == XXH_NEON */

/*
 * VSX and Z Vector helpers.
 *
 * This is very messy, and any pull requests to clean this up are welcome.
 *
 * There are a lot of problems with supporting VSX and s390x, due to
 * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
 */
#if XXH_VECTOR == XXH_VSX
#  if defined(__s390x__)
#    include <s390intrin.h>
#  else
/* gcc's altivec.h can have the unwanted consequence to unconditionally
 * #define bool, vector, and pixel keywords,
 * with bad consequences for programs already using these keywords for other purposes.
 * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
 * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
 * but it seems that, in some cases, it isn't.
 * Force the build macro to be defined, so that keywords are not altered.
 */
#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
#      define __APPLE_ALTIVEC__
#    endif
#    include <altivec.h>
#  endif

typedef __vector unsigned long long xxh_u64x2;
typedef __vector unsigned char xxh_u8x16;
typedef __vector unsigned xxh_u32x4;


/*
 * Performs an unaligned load and byte swaps it on big endian.
 */
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
{
    xxh_u64x2 ret;
    memcpy(&ret, ptr, sizeof(xxh_u64x2));
# if XXH_VSX_BE
    ret = XXH_vec_revb(ret);
# endif
    return ret;
}

/*
 * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
 *
 * These intrinsics weren't added until GCC 8, despite existing for a while,
 * and they are endian dependent. Also, their meaning swap depending on version.
 * */
# if defined(__s390x__)
 /* s390x is always big endian, no issue on this platform */
#  define XXH_vec_mulo vec_mulo
#  define XXH_vec_mule vec_mule
# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
#  define XXH_vec_mulo __builtin_altivec_vmulouw
#  define XXH_vec_mule __builtin_altivec_vmuleuw
# else
/* gcc needs inline assembly */
/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
{
    xxh_u64x2 result;
    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
    return result;
}
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
{
    xxh_u64x2 result;
    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
    return result;
}
# endif /* XXH_vec_mulo, XXH_vec_mule */
#endif /* XXH_VECTOR == XXH_VSX */


/* prefetch
 * can be disabled, by declaring XXH_NO_PREFETCH build macro */
#if defined(XXH_NO_PREFETCH)
#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
#else
#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
#  else
#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
#  endif
#endif  /* XXH_NO_PREFETCH */


/* ==========================================
 * XXH3 default settings
 * ========================================== */

#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */

#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
#  error "default keyset is not large enough"
#endif

/* Pseudorandom secret taken directly from FARSH */
XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
};



#if defined(_MSC_VER) && defined(_M_IX86)
#    include <intrin.h>
#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
#else
/*
 * Downcast + upcast is usually better than masking on older compilers like
 * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
 *
 * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
 * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
 */
#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
#endif

/*
 * Calculates a 64->128-bit long multiply.
 *
 * Uses __uint128_t and _umul128 if available, otherwise uses a scalar version.
 */
static XXH128_hash_t
XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
{
#if defined(__GNUC__) && !defined(__wasm__) \
    && defined(__SIZEOF_INT128__) \
    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)

    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
    XXH128_hash_t r128;
    r128.low64  = (xxh_u64)(product);
    r128.high64 = (xxh_u64)(product >> 64);
    return r128;

    /*
     * MSVC for x64's _umul128 method.
     *
     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
     *
     * This compiles to single operand MUL on x64.
     */
#elif defined(_M_X64) || defined(_M_IA64)

#ifndef _MSC_VER
#   pragma intrinsic(_umul128)
#endif
    xxh_u64 product_high;
    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
    XXH128_hash_t r128;
    r128.low64  = product_low;
    r128.high64 = product_high;
    return r128;

#else

    /* First calculate all of the cross products. */
    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);

    /* Now add the products together. These will never overflow. */
    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);

    XXH128_hash_t r128;
    r128.low64  = lower;
    r128.high64 = upper;
    return r128;
#endif
}

static xxh_u64
XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
{
    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
    return product.low64 ^ product.high64;
}

/* Seems to produce slightly better code on GCC for some reason. */
XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
{
    XXH_ASSERT(0 <= shift && shift < 64);
    return v64 ^ (v64 >> shift);
}

/*
 * This is a fast avalanche stage,
 * suitable when input bits are already partially mixed
 */
static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
{
    h64 = XXH_xorshift64(h64, 37);
    h64 *= 0x165667919E3779F9ULL;
    h64 = XXH_xorshift64(h64, 32);
    return h64;
}


XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
{
#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
    __asm__ ("" : "+r" (seed64));
#endif
    {   xxh_u64 const input_lo = XXH_readLE64(input);
        xxh_u64 const input_hi = XXH_readLE64(input+8);
        return XXH3_mul128_fold64(
            input_lo ^ (XXH_readLE64(secret)   + seed64),
            input_hi ^ (XXH_readLE64(secret+8) - seed64)
        );
    }
}


#define XXH3_MIDSIZE_MAX 240
#define XXH3_MIDSIZE_STARTOFFSET 3
#define XXH3_MIDSIZE_LASTOFFSET  17


/* =======     Long Keys     ======= */

#define XXH_STRIPE_LEN 64
#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))


XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
{
    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
    memcpy(dst, &v64, sizeof(v64));
}

/* Several intrinsic functions below are supposed to accept __int64 as argument,
 * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
 * However, several environments do not define __int64 type,
 * requiring a workaround.
 */
#if !defined (__VMS) \
  && (defined (__cplusplus) \
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
    typedef int64_t xxh_i64;
#else
    /* the following type must have a width of 64-bit */
    typedef long long xxh_i64;
#endif


#if (XXH_VECTOR == XXH_AVX512) || defined(XXH_X86DISPATCH)

#ifndef XXH_TARGET_AVX512
# define XXH_TARGET_AVX512  /* disable attribute target */
#endif

XXH_FORCE_INLINE XXH_TARGET_AVX512 void
XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT input,
                     const void* XXH_RESTRICT secret)
{
    XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc;
    XXH_ASSERT((((size_t)acc) & 63) == 0);
    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));

    {
        /* data_vec    = input[0]; */
        __m512i const data_vec    = _mm512_loadu_si512   (input);
        /* key_vec     = secret[0]; */
        __m512i const key_vec     = _mm512_loadu_si512   (secret);
        /* data_key    = data_vec ^ key_vec; */
        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
        /* data_key_lo = data_key >> 32; */
        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
        /* xacc[0] += swap(data_vec); */
        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
        /* xacc[0] += product; */
        *xacc = _mm512_add_epi64(product, sum);
    }
}


XXH_FORCE_INLINE XXH_TARGET_AVX512 void
XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 63) == 0);
    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
    {   XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc;
        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);

        /* xacc[0] ^= (xacc[0] >> 47) */
        __m512i const acc_vec     = *xacc;
        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
        /* xacc[0] ^= secret; */
        __m512i const key_vec     = _mm512_loadu_si512   (secret);
        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);

        /* xacc[0] *= XXH_PRIME32_1; */
        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
    }
}

XXH_FORCE_INLINE XXH_TARGET_AVX512 void
XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
{
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
    XXH_ASSERT(((size_t)customSecret & 63) == 0);
    (void)(&XXH_writeLE64);
    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64);

        XXH_ALIGN(64) const __m512i* const src  = (const __m512i*) XXH3_kSecret;
        XXH_ALIGN(64)       __m512i* const dest = (      __m512i*) customSecret;
        int i;
        for (i=0; i < nbRounds; ++i) {
            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
             * this will warn "discards ?const? qualifier". */
            union {
                XXH_ALIGN(64) const __m512i* cp;
                XXH_ALIGN(64) void* p;
            } remote_const_void;
            remote_const_void.cp = src + i;
            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
    }   }
}

#endif

#if (XXH_VECTOR == XXH_AVX2) || defined(XXH_X86DISPATCH)

#ifndef XXH_TARGET_AVX2
# define XXH_TARGET_AVX2  /* disable attribute target */
#endif

XXH_FORCE_INLINE XXH_TARGET_AVX2 void
XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
                    const void* XXH_RESTRICT input,
                    const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 31) == 0);
    {   XXH_ALIGN(32) __m256i* const xacc    =       (__m256i *) acc;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
        const         __m256i* const xinput  = (const __m256i *) input;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
        const         __m256i* const xsecret = (const __m256i *) secret;

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
            /* data_vec    = xinput[i]; */
            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
            /* key_vec     = xsecret[i]; */
            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
            /* data_key    = data_vec ^ key_vec; */
            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
            /* data_key_lo = data_key >> 32; */
            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
            /* xacc[i] += swap(data_vec); */
            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
            /* xacc[i] += product; */
            xacc[i] = _mm256_add_epi64(product, sum);
    }   }
}

XXH_FORCE_INLINE XXH_TARGET_AVX2 void
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 31) == 0);
    {   XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
        const         __m256i* const xsecret = (const __m256i *) secret;
        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
            /* xacc[i] ^= (xacc[i] >> 47) */
            __m256i const acc_vec     = xacc[i];
            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
            /* xacc[i] ^= xsecret; */
            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);

            /* xacc[i] *= XXH_PRIME32_1; */
            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
        }
    }
}

XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
{
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
    (void)(&XXH_writeLE64);
    XXH_PREFETCH(customSecret);
    {   __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, -(xxh_i64)seed64, (xxh_i64)seed64);

        XXH_ALIGN(64) const __m256i* const src  = (const __m256i*) XXH3_kSecret;
        XXH_ALIGN(64)       __m256i*       dest = (      __m256i*) customSecret;

#       if defined(__GNUC__) || defined(__clang__)
        /*
         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
         *   - do not extract the secret from sse registers in the internal loop
         *   - use less common registers, and avoid pushing these reg into stack
         * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with
         * customSecret, and on aarch64, this prevented LDP from merging two
         * loads together for free. Putting the loads together before the stores
         * properly generates LDP.
         */
        __asm__("" : "+r" (dest));
#       endif

        /* GCC -O2 need unroll loop manually */
        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
    }
}

#endif

#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)

#ifndef XXH_TARGET_SSE2
# define XXH_TARGET_SSE2  /* disable attribute target */
#endif

XXH_FORCE_INLINE XXH_TARGET_SSE2 void
XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
                    const void* XXH_RESTRICT input,
                    const void* XXH_RESTRICT secret)
{
    /* SSE2 is just a half-scale version of the AVX2 version. */
    XXH_ASSERT((((size_t)acc) & 15) == 0);
    {   XXH_ALIGN(16) __m128i* const xacc    =       (__m128i *) acc;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
        const         __m128i* const xinput  = (const __m128i *) input;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
        const         __m128i* const xsecret = (const __m128i *) secret;

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
            /* data_vec    = xinput[i]; */
            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
            /* key_vec     = xsecret[i]; */
            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
            /* data_key    = data_vec ^ key_vec; */
            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
            /* data_key_lo = data_key >> 32; */
            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
            /* xacc[i] += swap(data_vec); */
            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
            /* xacc[i] += product; */
            xacc[i] = _mm_add_epi64(product, sum);
    }   }
}

XXH_FORCE_INLINE XXH_TARGET_SSE2 void
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 15) == 0);
    {   XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
        /* Unaligned. This is mainly for pointer arithmetic, and because
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
        const         __m128i* const xsecret = (const __m128i *) secret;
        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
            /* xacc[i] ^= (xacc[i] >> 47) */
            __m128i const acc_vec     = xacc[i];
            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
            /* xacc[i] ^= xsecret[i]; */
            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);

            /* xacc[i] *= XXH_PRIME32_1; */
            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
        }
    }
}

XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
{
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
    (void)(&XXH_writeLE64);
    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);

#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
        // MSVC 32bit mode does not support _mm_set_epi64x before 2015
        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, -(xxh_i64)seed64 };
        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
#       else
        __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64);
#       endif
        int i;

        XXH_ALIGN(64)        const float* const src  = (float const*) XXH3_kSecret;
        XXH_ALIGN(XXH_SEC_ALIGN) __m128i*       dest = (__m128i*) customSecret;
#       if defined(__GNUC__) || defined(__clang__)
        /*
         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
         *   - do not extract the secret from sse registers in the internal loop
         *   - use less common registers, and avoid pushing these reg into stack
         */
        __asm__("" : "+r" (dest));
#       endif

        for (i=0; i < nbRounds; ++i) {
            dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src+i*4)), seed);
    }   }
}

#endif

#if (XXH_VECTOR == XXH_NEON)

XXH_FORCE_INLINE void
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
                    const void* XXH_RESTRICT input,
                    const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 15) == 0);
    {
        XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
        uint8_t const* const xinput = (const uint8_t *) input;
        uint8_t const* const xsecret  = (const uint8_t *) secret;

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
            /* data_vec = xinput[i]; */
            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
            /* key_vec  = xsecret[i];  */
            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
            uint64x2_t data_key;
            uint32x2_t data_key_lo, data_key_hi;
            /* xacc[i] += swap(data_vec); */
            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
            xacc[i] = vaddq_u64 (xacc[i], swapped);
            /* data_key = data_vec ^ key_vec; */
            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
             * data_key_hi = (uint32x2_t) (data_key >> 32);
             * data_key = UNDEFINED; */
            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);

        }
    }
}

XXH_FORCE_INLINE void
XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 15) == 0);

    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
        uint8_t const* xsecret = (uint8_t const*) secret;
        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);

        size_t i;
        for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
            /* xacc[i] ^= (xacc[i] >> 47); */
            uint64x2_t acc_vec  = xacc[i];
            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);

            /* xacc[i] ^= xsecret[i]; */
            uint8x16_t key_vec  = vld1q_u8(xsecret + (i * 16));
            uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec));

            /* xacc[i] *= XXH_PRIME32_1 */
            uint32x2_t data_key_lo, data_key_hi;
            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
             * xacc[i] = UNDEFINED; */
            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
            {   /*
                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
                 *
                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
                 * incorrectly "optimize" this:
                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
                 *   shifted = vshll_n_u32(tmp, 32);
                 * to this:
                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
                 *   shifted = vshlq_n_u64(tmp, 32);
                 *
                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
                 * for NEON, and it scalarizes two 64-bit multiplies instead.
                 *
                 * vmull_u32 has the same timing as vmul_u32, and it avoids
                 * this bug completely.
                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
                 */
                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
                /* xacc[i] = prod_hi << 32; */
                xacc[i] = vshlq_n_u64(prod_hi, 32);
                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
            }
    }   }
}

#endif

#if (XXH_VECTOR == XXH_VSX)

XXH_FORCE_INLINE void
XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
                    const void* XXH_RESTRICT input,
                    const void* XXH_RESTRICT secret)
{
          xxh_u64x2* const xacc     =       (xxh_u64x2*) acc;    /* presumed aligned */
    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
    xxh_u64x2 const v32 = { 32, 32 };
    size_t i;
    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
        /* data_vec = xinput[i]; */
        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
        /* key_vec = xsecret[i]; */
        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
        xxh_u64x2 const data_key = data_vec ^ key_vec;
        /* shuffled = (data_key << 32) | (data_key >> 32); */
        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
        xacc[i] += product;

        /* swap high and low halves */
#ifdef __s390x__
        xacc[i] += vec_permi(data_vec, data_vec, 2);
#else
        xacc[i] += vec_xxpermdi(data_vec, data_vec, 2);
#endif
    }
}

XXH_FORCE_INLINE void
XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ASSERT((((size_t)acc) & 15) == 0);

    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
        /* constants */
        xxh_u64x2 const v32  = { 32, 32 };
        xxh_u64x2 const v47 = { 47, 47 };
        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
        size_t i;
        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
            /* xacc[i] ^= (xacc[i] >> 47); */
            xxh_u64x2 const acc_vec  = xacc[i];
            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);

            /* xacc[i] ^= xsecret[i]; */
            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
            xxh_u64x2 const data_key = data_vec ^ key_vec;

            /* xacc[i] *= XXH_PRIME32_1 */
            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
            xacc[i] = prod_odd + (prod_even << v32);
    }   }
}

#endif

/* scalar variants - universal */

XXH_FORCE_INLINE void
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT input,
                     const void* XXH_RESTRICT secret)
{
    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
    const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */
    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
    size_t i;
    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
    for (i=0; i < XXH_ACC_NB; i++) {
        xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
        xacc[i ^ 1] += data_val; /* swap adjacent lanes */
        xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
    }
}

XXH_FORCE_INLINE void
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
    size_t i;
    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
    for (i=0; i < XXH_ACC_NB; i++) {
        xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
        xxh_u64 acc64 = xacc[i];
        acc64 = XXH_xorshift64(acc64, 47);
        acc64 ^= key64;
        acc64 *= XXH_PRIME32_1;
        xacc[i] = acc64;
    }
}



typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);


#if (XXH_VECTOR == XXH_AVX512)

#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512

#elif (XXH_VECTOR == XXH_AVX2)

#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2

#elif (XXH_VECTOR == XXH_SSE2)

#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2

#elif (XXH_VECTOR == XXH_NEON)

#define XXH3_accumulate_512 XXH3_accumulate_512_neon
#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon

#elif (XXH_VECTOR == XXH_VSX)

#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx

#else /* scalar */

#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar

#endif



#ifndef XXH_PREFETCH_DIST
#  ifdef __clang__
#    define XXH_PREFETCH_DIST 320
#  else
#    if (XXH_VECTOR == XXH_AVX512)
#      define XXH_PREFETCH_DIST 512
#    else
#      define XXH_PREFETCH_DIST 384
#    endif
#  endif  /* __clang__ */
#endif  /* XXH_PREFETCH_DIST */

/*
 * XXH3_accumulate()
 * Loops over XXH3_accumulate_512().
 * Assumption: nbStripes will not overflow the secret size
 */
XXH_FORCE_INLINE void
XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
                const xxh_u8* XXH_RESTRICT input,
                const xxh_u8* XXH_RESTRICT secret,
                      size_t nbStripes,
                      XXH3_f_accumulate_512 f_acc512)
{
    size_t n;
    for (n = 0; n < nbStripes; n++ ) {
        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
        f_acc512(acc,
                 in,
                 secret + n*XXH_SECRET_CONSUME_RATE);
    }
}

XXH_FORCE_INLINE void
XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
                      const xxh_u8* XXH_RESTRICT input, size_t len,
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                            XXH3_f_accumulate_512 f_acc512,
                            XXH3_f_scrambleAcc f_scramble)
{
    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
    size_t const nb_blocks = (len - 1) / block_len;

    size_t n;

    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);

    for (n = 0; n < nb_blocks; n++) {
        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
    }

    /* last partial block */
    XXH_ASSERT(len > XXH_STRIPE_LEN);
    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);

        /* last stripe */
        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
    }   }
}

XXH_FORCE_INLINE xxh_u64
XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
{
    return XXH3_mul128_fold64(
               acc[0] ^ XXH_readLE64(secret),
               acc[1] ^ XXH_readLE64(secret+8) );
}

static XXH64_hash_t
XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
{
    xxh_u64 result64 = start;
    size_t i = 0;

    for (i = 0; i < 4; i++) {
        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
#if defined(__clang__)                                /* Clang */ \
    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
        /*
         * UGLY HACK:
         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
         * XXH3_64bits, len == 256, Snapdragon 835:
         *   without hack: 2063.7 MB/s
         *   with hack:    2560.7 MB/s
         */
        __asm__("" : "+r" (result64));
#endif
    }

    return XXH3_avalanche(result64);
}

#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }


#define XXH_SECRET_MERGEACCS_START 11

/*
 * It's important for performance that XXH3_hashLong is not inlined.
 */



typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);


/* ===   Public entry point   === */







static void
XXH3_64bits_reset_internal(XXH3_state_t* statePtr,
                           XXH64_hash_t seed,
                           const void* secret, size_t secretSize)
{
    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
    XXH_ASSERT(statePtr != NULL);
    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
    memset((char*)statePtr + initStart, 0, initLength);
    statePtr->acc[0] = XXH_PRIME32_3;
    statePtr->acc[1] = XXH_PRIME64_1;
    statePtr->acc[2] = XXH_PRIME64_2;
    statePtr->acc[3] = XXH_PRIME64_3;
    statePtr->acc[4] = XXH_PRIME64_4;
    statePtr->acc[5] = XXH_PRIME32_2;
    statePtr->acc[6] = XXH_PRIME64_5;
    statePtr->acc[7] = XXH_PRIME32_1;
    statePtr->seed = seed;
    statePtr->extSecret = (const unsigned char*)secret;
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
}






/* Note : when XXH3_consumeStripes() is invoked,
 * there must be a guarantee that at least one more byte must be consumed from input
 * so that the function can blindly consume all stripes using the "normal" secret segment */
XXH_FORCE_INLINE void
XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
                    XXH3_f_accumulate_512 f_acc512,
                    XXH3_f_scrambleAcc f_scramble)
{
    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
        /* need a scrambling operation */
        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
        f_scramble(acc, secret + secretLimit);
        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
        *nbStripesSoFarPtr = nbStripesAfterBlock;
    } else {
        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
        *nbStripesSoFarPtr += nbStripes;
    }
}

/*
 * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
 */
XXH_FORCE_INLINE XXH_errorcode
XXH3_update(XXH3_state_t* state,
            const xxh_u8* input, size_t len,
            XXH3_f_accumulate_512 f_acc512,
            XXH3_f_scrambleAcc f_scramble)
{
    if (input==NULL)
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
        return XXH_OK;
#else
        return XXH_ERROR;
#endif

    {   const xxh_u8* const bEnd = input + len;
        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;

        state->totalLen += len;

        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {  /* fill in tmp buffer */
            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
            state->bufferedSize += (XXH32_hash_t)len;
            return XXH_OK;
        }
        /* total input is now > XXH3_INTERNALBUFFER_SIZE */

        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */

        /*
         * Internal buffer is partially filled (always, except at beginning)
         * Complete it, then consume it.
         */
        if (state->bufferedSize) {
            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
            input += loadSize;
            XXH3_consumeStripes(state->acc,
                               &state->nbStripesSoFar, state->nbStripesPerBlock,
                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
                                secret, state->secretLimit,
                                f_acc512, f_scramble);
            state->bufferedSize = 0;
        }
        XXH_ASSERT(input < bEnd);

        /* Consume input by a multiple of internal buffer size */
        if (input+XXH3_INTERNALBUFFER_SIZE < bEnd) {
            const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
            do {
                XXH3_consumeStripes(state->acc,
                                   &state->nbStripesSoFar, state->nbStripesPerBlock,
                                    input, XXH3_INTERNALBUFFER_STRIPES,
                                    secret, state->secretLimit,
                                    f_acc512, f_scramble);
                input += XXH3_INTERNALBUFFER_SIZE;
            } while (input<limit);
            /* for last partial stripe */
            memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
        }
        XXH_ASSERT(input < bEnd);

        /* Some remaining input (always) : buffer it */
        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
    }

    return XXH_OK;
}



XXH_FORCE_INLINE void
XXH3_digest_long (XXH64_hash_t* acc,
                  const XXH3_state_t* state,
                  const unsigned char* secret)
{
    /*
     * Digest on a local copy. This way, the state remains unaltered, and it can
     * continue ingesting more input afterwards.
     */
    memcpy(acc, state->acc, sizeof(state->acc));
    if (state->bufferedSize >= XXH_STRIPE_LEN) {
        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
        size_t nbStripesSoFar = state->nbStripesSoFar;
        XXH3_consumeStripes(acc,
                           &nbStripesSoFar, state->nbStripesPerBlock,
                            state->buffer, nbStripes,
                            secret, state->secretLimit,
                            XXH3_accumulate_512, XXH3_scrambleAcc);
        /* last stripe */
        XXH3_accumulate_512(acc,
                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
    } else {  /* bufferedSize < XXH_STRIPE_LEN */
        xxh_u8 lastStripe[XXH_STRIPE_LEN];
        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
        memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
        memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
        XXH3_accumulate_512(acc,
                            lastStripe,
                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
    }
}


#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))



XXH_FORCE_INLINE XXH128_hash_t
XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
    /* A doubled version of 1to3_64b with different constants. */
    XXH_ASSERT(input != NULL);
    XXH_ASSERT(1 <= len && len <= 3);
    XXH_ASSERT(secret != NULL);
    /*
     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
     */
    {   xxh_u8 const c1 = input[0];
        xxh_u8 const c2 = input[len >> 1];
        xxh_u8 const c3 = input[len - 1];
        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
        XXH128_hash_t h128;
        h128.low64  = XXH64_avalanche(keyed_lo);
        h128.high64 = XXH64_avalanche(keyed_hi);
        return h128;
    }
}

XXH_FORCE_INLINE XXH128_hash_t
XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
    XXH_ASSERT(input != NULL);
    XXH_ASSERT(secret != NULL);
    XXH_ASSERT(4 <= len && len <= 8);
    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
    {   xxh_u32 const input_lo = XXH_readLE32(input);
        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
        xxh_u64 const keyed = input_64 ^ bitflip;

        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));

        m128.high64 += (m128.low64 << 1);
        m128.low64  ^= (m128.high64 >> 3);

        m128.low64   = XXH_xorshift64(m128.low64, 35);
        m128.low64  *= 0x9FB21C651E98DF25ULL;
        m128.low64   = XXH_xorshift64(m128.low64, 28);
        m128.high64  = XXH3_avalanche(m128.high64);
        return m128;
    }
}

XXH_FORCE_INLINE XXH128_hash_t
XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
    XXH_ASSERT(input != NULL);
    XXH_ASSERT(secret != NULL);
    XXH_ASSERT(9 <= len && len <= 16);
    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
        xxh_u64 const input_lo = XXH_readLE64(input);
        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
        /*
         * Put len in the middle of m128 to ensure that the length gets mixed to
         * both the low and high bits in the 128x64 multiply below.
         */
        m128.low64 += (xxh_u64)(len - 1) << 54;
        input_hi   ^= bitfliph;
        /*
         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
         * the high 64 bits of m128.
         *
         * The best approach to this operation is different on 32-bit and 64-bit.
         */
        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
            /*
             * 32-bit optimized version, which is more readable.
             *
             * On 32-bit, it removes an ADC and delays a dependency between the two
             * halves of m128.high64, but it generates an extra mask on 64-bit.
             */
            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
        } else {
            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
        }
        /* m128 ^= XXH_swap64(m128 >> 64); */
        m128.low64  ^= XXH_swap64(m128.high64);

        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
            h128.high64 += m128.high64 * XXH_PRIME64_2;

            h128.low64   = XXH3_avalanche(h128.low64);
            h128.high64  = XXH3_avalanche(h128.high64);
            return h128;
    }   }
}

/*
 * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
 */
XXH_FORCE_INLINE XXH128_hash_t
XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
    XXH_ASSERT(len <= 16);
    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
        {   XXH128_hash_t h128;
            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
            return h128;
    }   }
}

/*
 * A bit slower than XXH3_mix16B, but handles multiply by zero better.
 */
XXH_FORCE_INLINE XXH128_hash_t
XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
              const xxh_u8* secret, XXH64_hash_t seed)
{
    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
    return acc;
}


XXH_FORCE_INLINE XXH128_hash_t
XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                      XXH64_hash_t seed)
{
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
    XXH_ASSERT(16 < len && len <= 128);

    {   XXH128_hash_t acc;
        acc.low64 = len * XXH_PRIME64_1;
        acc.high64 = 0;
        if (len > 32) {
            if (len > 64) {
                if (len > 96) {
                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
                }
                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
            }
            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
        }
        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
        {   XXH128_hash_t h128;
            h128.low64  = acc.low64 + acc.high64;
            h128.high64 = (acc.low64    * XXH_PRIME64_1)
                        + (acc.high64   * XXH_PRIME64_4)
                        + ((len - seed) * XXH_PRIME64_2);
            h128.low64  = XXH3_avalanche(h128.low64);
            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
            return h128;
        }
    }
}

XXH_NO_INLINE XXH128_hash_t
XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
{
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);

    {   XXH128_hash_t acc;
        int const nbRounds = (int)len / 32;
        int i;
        acc.low64 = len * XXH_PRIME64_1;
        acc.high64 = 0;
        for (i=0; i<4; i++) {
            acc = XXH128_mix32B(acc,
                                input  + (32 * i),
                                input  + (32 * i) + 16,
                                secret + (32 * i),
                                seed);
        }
        acc.low64 = XXH3_avalanche(acc.low64);
        acc.high64 = XXH3_avalanche(acc.high64);
        XXH_ASSERT(nbRounds >= 4);
        for (i=4 ; i < nbRounds; i++) {
            acc = XXH128_mix32B(acc,
                                input + (32 * i),
                                input + (32 * i) + 16,
                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
                                seed);
        }
        /* last bytes */
        acc = XXH128_mix32B(acc,
                            input + len - 16,
                            input + len - 32,
                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
                            0ULL - seed);

        {   XXH128_hash_t h128;
            h128.low64  = acc.low64 + acc.high64;
            h128.high64 = (acc.low64    * XXH_PRIME64_1)
                        + (acc.high64   * XXH_PRIME64_4)
                        + ((len - seed) * XXH_PRIME64_2);
            h128.low64  = XXH3_avalanche(h128.low64);
            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
            return h128;
        }
    }
}

XXH_FORCE_INLINE XXH128_hash_t
XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                            XXH3_f_accumulate_512 f_acc512,
                            XXH3_f_scrambleAcc f_scramble)
{
    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;

    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);

    /* converge into final hash */
    XXH_STATIC_ASSERT(sizeof(acc) == 64);
    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
    {   XXH128_hash_t h128;
        h128.low64  = XXH3_mergeAccs(acc,
                                     secret + XXH_SECRET_MERGEACCS_START,
                                     (xxh_u64)len * XXH_PRIME64_1);
        h128.high64 = XXH3_mergeAccs(acc,
                                     secret + secretSize
                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
                                     ~((xxh_u64)len * XXH_PRIME64_2));
        return h128;
    }
}


/*
 * It's important for performance that XXH3_hashLong is not inlined.
 */
XXH_NO_INLINE XXH128_hash_t
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
                              XXH64_hash_t seed64,
                              const void* XXH_RESTRICT secret, size_t secretLen)
{
    (void)seed64;
    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
                                       XXH3_accumulate_512, XXH3_scrambleAcc);
}


/*
 * It's important for performance that XXH3_hashLong is not inlined.
 */

typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);

XXH_FORCE_INLINE XXH128_hash_t
XXH3_128bits_internal(const void* input, size_t len,
                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
                      XXH3_hashLong128_f f_hl128)
{
    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
    /*
     * If an action is to be taken if `secret` conditions are not respected,
     * it should be done here.
     * For now, it's a contract pre-condition.
     * Adding a check and a branch here would cost performance at every hash.
     */
    if (len <= 16)
        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
    if (len <= 128)
        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
    if (len <= XXH3_MIDSIZE_MAX)
        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
    return f_hl128(input, len, seed64, secret, secretLen);
}


/* ===   Public XXH128 API   === */



XXH128_hash_t
XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
{
   return XXH3_128bits_internal(input, len, 0,
                                 (const xxh_u8*)secret, secretSize,
                                 XXH3_hashLong_128b_withSecret);
}



/* ===   XXH3 128-bit streaming   === */

/*
 * All the functions are actually the same as for 64-bit streaming variant.
 * The only difference is the finalizatiom routine.
 */

static void
XXH3_128bits_reset_internal(XXH3_state_t* statePtr,
                            XXH64_hash_t seed,
                            const void* secret, size_t secretSize)
{
    XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize);
}

XXH_errorcode
XXH3_128bits_reset(XXH3_state_t* statePtr)
{
    if (statePtr == NULL) return XXH_ERROR;
    XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
    return XXH_OK;
}

XXH_errorcode
XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
{
    return XXH3_update(state, (const xxh_u8*)input, len,
                       XXH3_accumulate_512, XXH3_scrambleAcc);
}

XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
{
    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
    if (state->totalLen > XXH3_MIDSIZE_MAX) {
        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
        XXH3_digest_long(acc, state, secret);
        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
        {   XXH128_hash_t h128;
            h128.low64  = XXH3_mergeAccs(acc,
                                         secret + XXH_SECRET_MERGEACCS_START,
                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
            h128.high64 = XXH3_mergeAccs(acc,
                                         secret + state->secretLimit + XXH_STRIPE_LEN
                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
            return h128;
        }
    }
    /* len <= XXH3_MIDSIZE_MAX : short code */
    if (state->seed)
	{
		myprintf("00010! GURU XXH NO SEED!\n");
		seppuku();
	}
    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                   secret, state->secretLimit + XXH_STRIPE_LEN);
}

/* 128-bit utility functions */


/*======   Canonical representation   ======*/

/* Pop our optimization override from above */
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
#  pragma GCC pop_options
#endif

/// LICENSE_END.14


/// incapsulate Yann's xxhash like Brumme's (not BIG ENDIAN compatible)
class XXHash64
{
public:
	explicit XXHash64(uint64_t seed)
	{
		XXH64_hash_t seme=seed;
		state = XXH64_createState();
		assert(state != NULL && "Out of memory!");
		XXH64_reset(state,seme);
	}
	bool add(const void* input, uint64_t length)
	{
		if (!input || length == 0)
			return false;
		XXH64_update(state, input,length);
        return true;
	}
	uint64_t hash() const
	{
		return XXH64_digest(state);
	}
	string getHash()
	{
		return bin2hex_64(XXH64_digest(state));
	}
	~XXHash64()
    {
		
        if (state != nullptr)
        {
			///myprintf("23865: DESTROY XXHASH64\n");
            franz_free(state);
            state=nullptr;
        }
    }


private:
	XXH64_state_t*	state;
};

#ifdef SERVER
XXHash64 g_socket_hash(0);
#endif


XXHash64 	g_franzhash_file(0);
int64_t  	g_franzhash_file_bytes=0;
XXHash64 	g_franzhash_index(0);
char		g_franzhash_104[104];

int b64invs[] = { 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58,
	59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5,
	6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
	21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
	29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
	43, 44, 45, 46, 47, 48, 49, 50, 51 };
int ismime(char i_c)
{
	if ((i_c>='0') && (i_c<='9'))
		return 1;
	if ((i_c >='A') && (i_c<='Z'))
		return 1;
	if ((i_c>='a') && (i_c <= 'z'))
		return 1;
	if ((i_c=='+') || (i_c=='/') || (i_c =='='))
		return 1;
	return 0;
}
size_t mimesize(const char *i_input)
{
	if (i_input==NULL)
		return 0;
	size_t len;
	size_t ret;
	size_t i;
	len=strlen(i_input); // risky!
	ret=len/4*3;
	for (i=len; i-->0;)
		if (i_input[i] == '=')
			ret--;
		else
			break;
	return ret;
}
int mime2binary(const char *i_in, unsigned char *o_out, size_t outlen)
{
	if (i_in==NULL)
		return 0;
	if (o_out==NULL)
		return 0;
	size_t lunghezza=strlen(i_in);
	if ((outlen<mimesize(i_in)) || (lunghezza % 4 != 0))
		return 0;
	for (unsigned int i=0; i<lunghezza; i++)
		if (!ismime(i_in[i]))
			return 0;
	size_t i;
	size_t j;
	int    temp;
	for (i=0,j=0;i<lunghezza;i+=4,j+=3)
	{
		temp= b64invs[i_in[i]-43];
		temp= (temp << 6) | b64invs[i_in[i+1]-43];
		temp= i_in[i+2]=='=' ? temp << 6 : (temp << 6) | b64invs[i_in[i+2]-43];
		temp= i_in[i+3]=='=' ? temp << 6 : (temp << 6) | b64invs[i_in[i+3]-43];
		o_out[j] = (temp >> 16) & 0xFF;
		if (i_in[i+2] != '=')
			o_out[j+1] = (temp >> 8) & 0xFF;
		if (i_in[i+3] != '=')
			o_out[j+2] = temp & 0xFF;
	}
	return 1;
}


// Handle errors in libzpaq and elsewhere
void libzpaq::error(const char* msg) {
	g_exec_text=msg;
  if (strstr(msg, "ut of memory")) throw std::bad_alloc();
  throw std::runtime_error(msg);
}
using libzpaq::error;
// Portable thread types and functions for Windows and Linux. Use like this:
//
// // Create mutex for locking thread-unsafe code
// Mutex mutex;            // shared by all threads
// init_mutex(mutex);      // initialize in unlocked state
// Semaphore sem(n);       // n >= 0 is initial state
//
// // Declare a thread function
// ThreadReturn thread(void *arg) {  // arg points to in/out parameters
//   lock(mutex);          // wait if another thread has it first
//   release(mutex);       // allow another waiting thread to continue
//   sem.wait();           // wait until n>0, then --n
//   sem.signal();         // ++n to allow waiting threads to continue
//   return 0;             // must return 0 to exit thread
// }
//
// // Start a thread
// ThreadID tid;
// run(tid, thread, &arg); // runs in parallel
// join(tid);              // wait for thread to return
// destroy_mutex(mutex);   // deallocate resources used by mutex
// sem.destroy();          // deallocate resources used by semaphore
#ifdef unix
typedef void* ThreadReturn;                                // job return type
typedef pthread_t ThreadID;                                // job ID type
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg)// start job
  {pthread_create(&tid, NULL, f, arg);}
void join(ThreadID tid) {pthread_join(tid, NULL);}         // wait for job
typedef pthread_mutex_t Mutex;                             // mutex type
void init_mutex(Mutex& m) {pthread_mutex_init(&m, 0);}     // init mutex
void lock(Mutex& m) {pthread_mutex_lock(&m);}              // wait for mutex
void release(Mutex& m) {pthread_mutex_unlock(&m);}         // release mutex
void destroy_mutex(Mutex& m) {pthread_mutex_destroy(&m);}  // destroy mutex
class Semaphore {
public:
  Semaphore() {sem=-1;}
  void init(int n) {
    assert(n>=0);
    assert(sem==-1);
    pthread_cond_init(&cv, 0);
    pthread_mutex_init(&mutex, 0);
    sem=n;
  }
  void destroy() {
    assert(sem>=0);
    pthread_mutex_destroy(&mutex);
    pthread_cond_destroy(&cv);
  }
  int wait() {
    assert(sem>=0);
    pthread_mutex_lock(&mutex);
    int r=0;
    if (sem==0) r=pthread_cond_wait(&cv, &mutex);
    assert(sem>0);
    --sem;
    pthread_mutex_unlock(&mutex);
    return r;
  }
  void signal() {
    assert(sem>=0);
    pthread_mutex_lock(&mutex);
    ++sem;
    pthread_cond_signal(&cv);
    pthread_mutex_unlock(&mutex);
  }
private:
  pthread_cond_t cv;  // to signal FINISHED
  pthread_mutex_t mutex; // protects cv
  int sem;  // semaphore count
};

#else  // Windows
typedef DWORD ThreadReturn;
typedef HANDLE ThreadID;
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg) {
  tid=CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)f, arg, 0, NULL);
  if (tid==NULL) error("CreateThread failed");
}
void join(ThreadID& tid) {WaitForSingleObject(tid, INFINITE);}
typedef HANDLE Mutex;
void init_mutex(Mutex& m) {m=CreateMutex(NULL, FALSE, NULL);}
void lock(Mutex& m) {WaitForSingleObject(m, INFINITE);}
void release(Mutex& m) {ReleaseMutex(m);}
void destroy_mutex(Mutex& m) {CloseHandle(m);}
class Semaphore {
public:
  enum {MAXCOUNT=2000000000};
  Semaphore(): h(NULL) {}
  void init(int n) {assert(!h); h=CreateSemaphore(NULL, n, MAXCOUNT, NULL);}
  void destroy() {assert(h); CloseHandle(h);}
  int wait() {assert(h); return WaitForSingleObject(h, INFINITE);}
  void signal() {assert(h); ReleaseSemaphore(h, 1, NULL);}
private:
  HANDLE h;  // Windows semaphore
};
#endif



#ifdef _WIN32
int64_t mtime()
{
  int64_t t=GetTickCount();
  if (t<g_start) t+=0x100000000LL;
  return t;
}

#ifdef _WIN32
HRESULT ModifyPrivilege(
    IN LPCTSTR szPrivilege,
    IN BOOL fEnable)
{
    HRESULT hr = S_OK;
    TOKEN_PRIVILEGES NewState;
    LUID             luid;
    HANDLE hToken    = NULL;
    if (!OpenProcessToken(GetCurrentProcess(),
                          TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
                          &hToken ))
    {
        myprintf("00011! Failed OpenProcessToken\n");
        return ERROR_FUNCTION_FAILED;
    }
    if ( !LookupPrivilegeValue( NULL,
                                szPrivilege,
                                &luid ))
    {
        CloseHandle( hToken );
        myprintf("00012!		Failed LookupPrivilegeValue\n");
        return ERROR_FUNCTION_FAILED;
    }
    NewState.PrivilegeCount = 1;
    NewState.Privileges[0].Luid = luid;
    NewState.Privileges[0].Attributes =
              (fEnable ? SE_PRIVILEGE_ENABLED : 0);
    if (!AdjustTokenPrivileges(hToken,
                               FALSE,
                               &NewState,
                               0,
                               NULL,
                               NULL))
    {
        myprintf("00013! Failed AdjustTokenPrivileges\n");
        hr = ERROR_FUNCTION_FAILED;
    }
    CloseHandle(hToken);
    return hr;
}
/* NTFS reparse point definitions */
/* Constants from http://msdn.microsoft.com/en-us/library/dd541667.aspx */
/* Some, but not all, of them also defined in recent versions of winnt.h. */
/* All seem to come from NT DDK's ntifs.h, for installable file system drivers. */
/* Since the list varies a lot, redefine them one by one as needed */
/* Bit 31 = Tag owned by Microsoft
   Bit 30 = Reserved for Microsoft. Must be 0 for non-MS tags.
   Bit 29 = Surrogate bit. Points to another file of directory.
   Bit 28 = Directory bit. Any directory with this reparse tag can have children.
   Bits 16-27: Invalid and must be 0. */
/* Reparse tags, with the exception of IO_REPARSE_TAG_SYMLINK,
   are processed on the server and are not processed by a client after transmission over the wire. */
/* See https://github.com/prsyahmi/GpuRamDrive/blob/master/GpuRamDrive/3rdparty/inc/imdisk/ntumapi.h
   for a list of non-Microsoft reparse tags */
#define REPARSE_READ_BUFFER_HEADER_SIZE (sizeof(REPARSE_READ_BUFFER) - sizeof(UCHAR))
typedef struct _REPARSE_SYMLINK_READ_BUFFER { // For tag IO_REPARSE_TAG_SYMLINK
  DWORD  ReparseTag;
  WORD   ReparseDataLength;
  WORD   Reserved;
  WORD   SubstituteNameOffset;
  WORD   SubstituteNameLength;
  WORD   PrintNameOffset;
  WORD   PrintNameLength;
  ULONG  Flags;
  WCHAR  PathBuffer[1];
} SYMLINK_READ_BUFFER, *PSYMLINK_READ_BUFFER;
#define SYMLINK_READ_BUFFER_HEADER_SIZE (sizeof(SYMLINK_READ_BUFFER) - sizeof(WCHAR))
typedef struct _REPARSE_MOUNTPOINT_READ_BUFFER { // For tag IO_REPARSE_TAG_MOUNT_POINT, aka. junctions
  DWORD  ReparseTag;
  WORD   ReparseDataLength;
  WORD   Reserved;
  WORD   SubstituteNameOffset;
  WORD   SubstituteNameLength;
  WORD   PrintNameOffset;
  WORD   PrintNameLength;
  WCHAR  PathBuffer[1];
} MOUNTPOINT_READ_BUFFER, *PMOUNTPOINT_READ_BUFFER;
#define MOUNTPOINT_READ_BUFFER_HEADER_SIZE (sizeof(MOUNTPOINT_READ_BUFFER) - sizeof(WCHAR))
typedef struct _REPARSE_MOUNTPOINT_WRITE_BUFFER {
  DWORD  ReparseTag;
  DWORD  ReparseDataLength;
  WORD   Reserved;
  WORD   ReparseTargetLength;
  WORD   ReparseTargetMaximumLength;
  WORD   Reserved1;
  WCHAR  ReparseTarget[1];
} MOUNTPOINT_WRITE_BUFFER, *PMOUNTPOINT_WRITE_BUFFER;
#define MOUNTPOINT_WRITE_BUFFER_HEADER_SIZE (sizeof(MOUNTPOINT_WRITE_BUFFER) - sizeof(WCHAR))
// Universal Windows Platform (UWP) Application Execution Links
// Ref: https://www.tiraniddo.dev/2019/09/overview-of-windows-execution-aliases.html
typedef struct _REPARSE_APPEXECLINK_READ_BUFFER { // For tag IO_REPARSE_TAG_APPEXECLINK
  DWORD  ReparseTag;
  WORD   ReparseDataLength;
  WORD   Reserved;
  ULONG  Version;	// Currently version 3
  WCHAR  StringList[1];	// Multistring (Consecutive strings each ending with a NUL)
  /* There are normally 4 strings here. Ex:
	Package ID:	L"Microsoft.WindowsTerminal_8wekyb3d8bbwe"
	Entry Point:	L"Microsoft.WindowsTerminal_8wekyb3d8bbwe!App"
	Executable:	L"C:\Program Files\WindowsApps\Microsoft.WindowsTerminal_1.4.3243.0_x64__8wekyb3d8bbwe\wt.exe"
	Applic. Type:	l"0"	// Integer as ASCII. "0" = Desktop bridge application; Else sandboxed UWP application
  */
} APPEXECLINK_READ_BUFFER, *PAPPEXECLINK_READ_BUFFER;
// LinuX Sub-System (LXSS) Symbolic Links
typedef struct _REPARSE_LX_SYMLINK_BUFFER {
  DWORD  ReparseTag;
  WORD	 ReparseDataLength;
  WORD	 Reserved;
  DWORD  FileType; 	// Value is apparently always 2 for symlinks.
  char   PathBuffer[1];	// POSIX path of symlink. UTF-8. Not \0 terminated.
} LX_SYMLINK_READ_BUFFER, *PLX_SYMLINK_READ_BUFFER;
#endif

	#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
		#define ENABLE_VIRTUAL_TERMINAL_PROCESSING  0x0004
	#endif
static HANDLE stdoutHandle;
static DWORD outModeInit;
void setupConsole(void)
{
	if (flagnoconsole)
		return;
	DWORD outMode 	= 0;
	stdoutHandle 	= GetStdHandle(STD_OUTPUT_HANDLE);
	if(stdoutHandle == INVALID_HANDLE_VALUE)
		exit(GetLastError());
	if(!GetConsoleMode(stdoutHandle, &outMode))
		exit(GetLastError());
	outModeInit = outMode;
	outMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
	if(!SetConsoleMode(stdoutHandle, outMode))
		exit(GetLastError());
}
void restoreConsole(void)
{
	if (flagnoconsole)
		return;
	if (flagsilent)
		return;
	printf("\x1b[0m");
	if(!SetConsoleMode(stdoutHandle, outModeInit))
		exit(GetLastError());
}
// In Windows, convert 16-bit wide string to UTF-8 and \ to /
bool windows7_or_above=false; //windows version (for using FindFirstFileExW)
string wtou(const wchar_t* s) {
  assert(sizeof(wchar_t)==2);  // Not true in Linux
  assert((wchar_t)(-1)==65535);
  string r;
  if (!s) return r;
  for (; *s; ++s) {
    if (*s=='\\') r+='/';
    else if (*s<128) r+=*s;
    else if (*s<2048) r+=192+*s/64, r+=128+*s%64;
    else r+=224+*s/4096, r+=128+*s/64%64, r+=128+*s%64;
  }
  return r;
}
/*
string get_good_filename(string i_filename) 
{
	WIN32_FIND_DATA ffd;
	if ( (i_filename.size()>0) && (isdirectory(i_filename)))
	i_filename+="/";
	
	HANDLE h=FindFirstFile(utow(i_filename.c_str()).c_str(), &ffd);
	if (h!=INVALID_HANDLE_VALUE)
	{
		string kz=wtou(ffd.cFileName);
		myprintf("kkkkk %s\n",kz.c_str());
		FindClose(h);
		return wtou(ffd.cFileName);
	}
	return i_filename;
}
*/
string win_getcomputername()
{
	wchar_t buffer[256];
	DWORD 	size=256;
	string risultato="";
	if (GetComputerName(buffer,&size))
		risultato=wtou(buffer);
	return risultato;
}
string win_getusername()
{
	wchar_t buffer[256];
	DWORD 	size=256;
	string risultato="";
	if (GetUserName(buffer,&size))
		risultato=wtou(buffer);
	return risultato;
}
string my_realpath(std::string const& i_path)
{
	if (i_path=="")
			return "";
	char linkbuffer[66000]={0};
	size_t	linksize=66000;
    HANDLE h = CreateFileW(utow(i_path.c_str()).c_str(), 0, 0, NULL, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT, NULL);
    char buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
    DWORD dwBytesReturned = 0;
    DeviceIoControl(h, FSCTL_GET_REPARSE_POINT, NULL, 0, buffer, sizeof(buffer), &dwBytesReturned, 0);
    typedef struct
    {
        ULONG ReparseTag;
        USHORT ReparseDataLength;
        USHORT Reserved;
        union
        {
            struct
            {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                ULONG Flags;
                WCHAR PathBuffer[1];
            } SymbolicLinkReparseBuffer;
            struct
            {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                WCHAR PathBuffer[1];
            } MountPointReparseBuffer;
            struct
            {
                UCHAR  DataBuffer[1];
            } GenericReparseBuffer;
        };
    } REPARSE_DATA_BUFFER;
    REPARSE_DATA_BUFFER* pRDB = reinterpret_cast<REPARSE_DATA_BUFFER*>(buffer);
    if (pRDB->ReparseTag == IO_REPARSE_TAG_SYMLINK)
    {
        int nameLength = pRDB->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(wchar_t);
        wchar_t* pName = (wchar_t*)((char*)pRDB->SymbolicLinkReparseBuffer.PathBuffer + pRDB->SymbolicLinkReparseBuffer.SubstituteNameOffset);
        WideCharToMultiByte(CP_UTF8, 0, pName, nameLength, linkbuffer, linksize, NULL, NULL);
        return linkbuffer;
    }
    CloseHandle(h);
    return "";
}
uint32_t convert_unicode_to_ansi_string(std::string& ansi,const wchar_t* unicode,const size_t unicode_size)
	{
		uint32_t error = 0;
		do
		{
			if ((nullptr == unicode) || (0 == unicode_size))
			{
				error = ERROR_INVALID_PARAMETER;
				break;
			}
			ansi.clear();
			int required_cch=::WideCharToMultiByte(
									CP_ACP,
									0,
									unicode, static_cast<int>(unicode_size),
									nullptr, 0,
									nullptr, nullptr
									);
			if (required_cch==0)
			{
				error=::GetLastError();
				break;
			}
			ansi.resize(required_cch);
			if (0 == ::WideCharToMultiByte(
						CP_ACP,
						0,
						unicode, static_cast<int>(unicode_size),
						const_cast<char*>(ansi.c_str()), static_cast<int>(ansi.size()),
						nullptr, nullptr
						))
			{
				error =::GetLastError();
				break;
			}
		}
		while (false);
		return error;
	}
uint32_t convert_utf8_to_unicode_string(std::wstring& unicode,const char* utf8,const size_t utf8_size)
	{
		uint32_t error = 0;
		do
		{
			if ((nullptr == utf8) || (0 == utf8_size))
			{
				error = ERROR_INVALID_PARAMETER;
				break;
			}
			unicode.clear();
			int required_cch = ::MultiByteToWideChar(
				CP_UTF8,
				MB_ERR_INVALID_CHARS,
				utf8, static_cast<int>(utf8_size),
				nullptr, 0
			);
			if (required_cch==0)
			{
				error = ::GetLastError();
				break;
			}
			unicode.resize(required_cch);
			if (0 == ::MultiByteToWideChar(
						CP_UTF8,
						MB_ERR_INVALID_CHARS,
						utf8, static_cast<int>(utf8_size),
						const_cast<wchar_t*>(unicode.c_str()), static_cast<int>(unicode.size())
						))
			{
				error=::GetLastError();
				break;
			}
		}
		while (false);
		return error;
	}
// Windows: double converison
	std::string utf8toansi(const std::string & utf8)
	{
		std::wstring unicode = L"";
		convert_utf8_to_unicode_string(unicode, utf8.c_str(), utf8.size());
		std::string ansi = "";
		convert_unicode_to_ansi_string(ansi, unicode.c_str(), unicode.size());
		return ansi;
	}
int erredbarras(const std::wstring &wi_path)
{
	std::wstring wpattern = wi_path+L"\\*.*";
	const std::string s_pattern(wpattern.begin(),wpattern.end());
	if (flagdebug)
		myprintf("00014: get handle FOR %s\n",s_pattern.c_str());
	int secondi=(mtime()-g_startrd)/1000;
	if (secondi!=g_rd_ultimotempo)
	{
		g_rd_ultimotempo=secondi;
		if (g_rd_expected)
			myprintf("00015: Deleted objects %12s of (~) %12s @ %s/s\r",migliaia(g_rd),migliaia2(g_rd_expected),migliaia3(g_rd/secondi));
		else
			myprintf("00016: Deleted objects %12s\r",migliaia(g_rd));
	}
	WIN32_FIND_DATAW findfiledata;
	HANDLE myhandle=FindFirstFileW(wpattern.c_str(),&findfiledata);
	if (myhandle==INVALID_HANDLE_VALUE)
	{
		if (flagdebug)
			myprintf("00017: Invalid handle %s\n",s_pattern.c_str());
		return 0;
	}
	do
	{
		std::string t=wtou(findfiledata.cFileName);
		if ((t!=".") && (t!=".."))
		{
			std::wstring wfilepath=wi_path+L"\\"+findfiledata.cFileName;
			const std::string s_wfilepath(wfilepath.begin(),wfilepath.end());
			if (flagdebug3)
				myprintf("00018: Working on %s\n",s_wfilepath.c_str());
			if (findfiledata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
			{
				if (flagdebug3)
				{
					myprintf("\n");
					myprintf("00019: recurse on %s\n",s_wfilepath.c_str());
				}
				int myresult=erredbarras(wfilepath);
				if (myresult)
					return myresult;
			}
			else
			{
				if (flagdebug3)
					myprintf("00020: set attribute on file %s\n",s_wfilepath.c_str());
				if (SetFileAttributesW(wfilepath.c_str(),FILE_ATTRIBUTE_NORMAL) == FALSE)
				{
					if (flagdebug)
						myprintf("00021! ERROR cannot change attr of %s\n",s_wfilepath.c_str());
					return GetLastError();
				}
				if (flagdebug3)
					myprintf("00022: try to delete file %s\n",s_wfilepath.c_str());
				if (DeleteFileW(wfilepath.c_str())==FALSE)
				{
					if (flagdebug)
						myprintf("00023! ERROR highlander file %s\n",s_wfilepath.c_str());
					return GetLastError();
				}
				else
					g_rd++;
			}
		}
	} while(FindNextFile(myhandle,&findfiledata)==TRUE);
	if (myhandle)
		FindClose(myhandle);
	DWORD myerror=GetLastError();
	if (myerror==ERROR_NO_MORE_FILES)
	{
		const std::string s_wipath(wi_path.begin(), wi_path.end());
		if (flagdebug3)
			myprintf("00024: Change folder attr  %s\n",s_wipath.c_str());
		if (SetFileAttributesW(wi_path.c_str(),FILE_ATTRIBUTE_NORMAL)==FALSE)
		{
			if (flagdebug)
				myprintf("00025! ERROR cannot change folder attr %s\n",s_wipath.c_str());
			return GetLastError();
		}
		if (flagdebug3)
			myprintf("00026: RemoveDirectory  %s\n",s_wipath.c_str());
		if (RemoveDirectoryW(wi_path.c_str())==FALSE)
		{
			if (flagdebug)
				myprintf("00027! ERROR highlander dir %s\n",s_wipath.c_str());
			return GetLastError();
		}
		else
		g_rd++;
	}
	else
		return myerror;
	return 0;
}


int64_t	getwinattributes(string i_filename)
{
	WIN32_FIND_DATA ffd;
    if ( (i_filename.size()>0) && (isdirectory(i_filename)))
		i_filename+="*";
	HANDLE h=FindFirstFile(utow(i_filename.c_str()).c_str(), &ffd);
	if (h!=INVALID_HANDLE_VALUE)
	{
		FindClose(h);
		return ffd.dwFileAttributes;
	}
	return 0;
}
/// reworked https://github.com/JFLarvoire/SysToolsLib/blob/master/C/MsvcLibX/src/readlink.c
typedef struct _REPARSE_READ_BUFFER
{
	DWORD  ReparseTag;
	WORD   ReparseDataLength;
	WORD   Reserved;
	UCHAR  DataBuffer[1];
} REPARSE_READ_BUFFER, *PREPARSE_READ_BUFFER;
bool getreparsepointW(bool i_flagdebug,const string i_path, char *i_buf, size_t i_bufsize,size_t& o_byteletti,DWORD& o_tag,string& o_type)
{
	o_byteletti	=0;
	o_tag		=0;
	o_type		="";
	myprintf("00028: REPPAAA\n");
	return true;
	wstring wi_path=utow(i_path.c_str());
	PREPARSE_READ_BUFFER pIoctlBuf;
	DWORD attributi = GetFileAttributesW(wi_path.c_str());
	if (attributi==INVALID_FILE_ATTRIBUTES)
	{
		myprintf("00029! failed GetFileAttributesW\n");
		return false;
	}
	if (!(attributi & FILE_ATTRIBUTE_REPARSE_POINT))
	{
		myprintf("00030! fake file is not a reparse point\n");
		return false;
	}
	DWORD flag=FILE_FLAG_OPEN_REPARSE_POINT;
	if (attributi & FILE_ATTRIBUTE_DIRECTORY)
		flag|=FILE_FLAG_BACKUP_SEMANTICS;
	HANDLE h=CreateFileW(wi_path.c_str(),0,FILE_SHARE_READ | FILE_SHARE_WRITE,NULL,OPEN_EXISTING,flag,NULL);
	if (h==INVALID_HANDLE_VALUE)
	{
		myprintf("00031! cannot createfile on reparse point\n");
		return false;
	}
	DWORD byteletti;
 	BOOL fattoio=DeviceIoControl(h,FSCTL_GET_REPARSE_POINT,NULL,0,i_buf,(DWORD)i_bufsize,&byteletti,NULL);
	CloseHandle(h);
	if (!fattoio)
	{
		myprintf("00032! DeviceIoControl kaputt\n");
		return false;
	}
	if (byteletti<8)
	{
		myprintf("00033! something wrong\n");
		return false;
	}
	pIoctlBuf 	= (PREPARSE_READ_BUFFER)i_buf;
	o_tag		=pIoctlBuf->ReparseTag;
	o_byteletti	=pIoctlBuf->ReparseDataLength;
    switch (o_tag)
	{
		case 0x00000000: 	o_type="Reserved0"; break;
		case 0x00000001: 	o_type="Reserved1"; break;
		case 0x00000002: 	o_type="Reserved2"; break;
		case 0xA0000003: 	o_type="Mount point or junction"; break;
		case 0xC0000004: 	o_type="Hierarchical Storage Manager"; break;
		case 0x80000005: 	o_type="Home server drive extender"; break;
		case 0x80000006: 	o_type="Hierarchical Storage Manager Product #2"; break;
		case 0x80000007: 	o_type="Single-instance storage filter driver"; break;
		case 0x80000008: 	o_type="Windows boot Image File"; break;
		case 0x80000009: 	o_type="Cluster Shared Volume"; break;
		case 0x8000000A: 	o_type="Distributed File System"; break;
		case 0x8000000B: 	o_type="Filter manager test harness"; break;
		case 0xA000000C: 	o_type="Symbolic link"; break;
		case 0xA0000010: 	o_type="Internet Information Services cache"; break;
		case 0x80000012: 	o_type="Distributed File System R filter"; break;
		case 0x80000013: 	o_type="Deduplicated file"; break;
		case 0x80000014: 	o_type="NFS symbolic link"; break;
		case 0xC0000014: 	o_type="APPXSTREAM (Not used?)"; break;
		case 0x80000015: 	o_type="Placeholder for a OneDrive file"; break;
		case 0x80000016: 	o_type="Dynamic File filter"; break;
		case 0x80000017: 	o_type="Windows Overlay Filesystem compressed file"; break;
		case 0x80000018: 	o_type="Windows Container Isolation filter"; break;
		case 0xA0000019: 	o_type="NPFS server silo named pipe symbolic link into the host silo"; break;
		case 0x9000001A: 	o_type="Cloud Files filter"; break;
		case 0x8000001B: 	o_type="Application Execution link"; break;
		case 0x9000001C: 	o_type="Projected File System VFS filter, ex for git"; break;
		case 0xA000001D: 	o_type="Linux Sub-System Symbolic Link"; break;
		case 0x8000001E: 	o_type="Azure File Sync (AFS) filter"; break;
		case 0xA000001F: 	o_type="Windows Container Isolation filter tombstone"; break;
		case 0xA0000020: 	o_type="Unhandled Windows Container Isolation filter"; break;
		case 0xA0000021: 	o_type="One Drive (Not used?)"; break;
		case 0xA0000022: 	o_type="Projected File System VFS filter tombstone, ex for git"; break;
		case 0xA0000023: 	o_type="Linux Sub-System Socket"; break;
		case 0xA0000024: 	o_type="Linux Sub-System FIFO"; break;
		case 0xA0000025: 	o_type="Linux Sub-System Character Device"; break;
		case 0xA0000026: 	o_type="Linux Sub-System Block Device"; break;
		case 0xA0000027: 	o_type="Windows Container Isolation filter Link"; break;
		default:			o_type="35701: Microsoft strikes back!"; break;
    }
	if (!i_flagdebug)
		return true;
    unsigned int ul;
    unsigned int u;
    unsigned int uMax;
	for (ul = 0; ul < (unsigned)(pIoctlBuf->ReparseDataLength); ul += 16)
	{
		myprintf("%08X ", ul);
		uMax = (unsigned)(pIoctlBuf->ReparseDataLength) - ul;
		if (uMax > 16)
			uMax = 16;
      /* Display the hex dump */
		for (u=0; u<16; u++)
		{
			if (!(u&3))
				myprintf(" ");
			if (u < uMax)
				myprintf("%2.2X ", ((unsigned char *)pIoctlBuf->DataBuffer)[ul + u]);
			else
				myprintf("   ");
		}
      /* Display the ASCII characters dump */
		for (u=0; u<16; u++)
		{
			char c = ((char *)pIoctlBuf->DataBuffer)[ul + u];
			if (!(u&7))
				myprintf(" ");
			if (c < ' ')
				c = ' ';
			if ((unsigned char)c > '\x7F')
				c = ' ';
			myprintf("%c", c);
		}
		myprintf("\n");
    }
	return true;
}
#else // Houston, we have Unix
int64_t mtime()
{
  timeval tv;
  gettimeofday(&tv, 0);
  return tv.tv_sec*1000LL+tv.tv_usec/1000;
}
std::string utf8toansi(const std::string & utf8)
{
	return utf8;
}
void setupConsole(void)
{
}
void restoreConsole(void)
{
	if (flagnoconsole)
		return;
	if (flagsilent)
		return;
	printf("\x1b[0m");
}
#endif

int terminalwidth()
{
#if defined(_WIN32)
    CONSOLE_SCREEN_BUFFER_INFO csbi;
    GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
    return (int) csbi.srWindow.Right - csbi.srWindow.Left + 1;
#else
    struct winsize w;
    int colonna=80;
	if (ioctl(fileno(stdout), TIOCGWINSZ, &w)==0)
		colonna=(w.ws_col);
	if (colonna>200)
		colonna=200;
    return colonna;
#endif
}
int terminalheight()
{
#if defined(_WIN32)
    CONSOLE_SCREEN_BUFFER_INFO csbi;
    GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
	return (int)csbi.srWindow.Bottom - csbi.srWindow.Top + 1;
#else
    struct winsize w;
    int riga=30;
	if (ioctl(fileno(stdout), TIOCGWINSZ, &w)==0)
		riga=(w.ws_row);
	if (riga>200)
		riga=30;
    return riga;
#endif
}
bool iskeypressed(int i_thekey)
{
#ifdef _WIN32
    if (kbhit())
    {
		int keypressed=::getch();
		if (flagdebug)
			printf("21029: WIN32 getch %d %c\n",keypressed,keypressed);
        if (i_thekey==0)
			return true;
		else
			return (i_thekey==keypressed);
    }
	return false;
#else
	struct termios old_t;
	struct termios new_t;
	tcgetattr (0,&old_t);
	new_t = old_t;
	new_t.c_lflag &= ~( ICANON | ECHO );
	new_t.c_cc[VMIN] = 0;
	new_t.c_cc[VTIME] = 0;
	tcsetattr(0,TCSANOW,&new_t);
	char ch;
	int letti=read(0,&ch,1);
	tcsetattr(0,TCSANOW,&old_t);
	if (i_thekey==0)
		return (letti!=0);
	else
		return ((letti!=0) && (i_thekey==ch));
#endif
}
int mygetch(bool i_flagmore)
{
	int mychar=0;
#if defined(_WIN32)
	mychar=::getch();
#endif
#ifdef unix
/// BSD Unix
	struct termios oldt, newt;
	tcgetattr ( STDIN_FILENO, &oldt );
	newt = oldt;
	newt.c_lflag &= ~( ICANON | ECHO );
	tcsetattr ( STDIN_FILENO, TCSANOW, &newt );
	mychar = getchar();
	tcsetattr ( STDIN_FILENO, TCSANOW, &oldt );
#endif
	if (!i_flagmore)
		return mychar;
	if ((mychar==113) || (mychar==81) || (mychar==3))  /// q, Q, control-C
	{
#ifdef unix
		myprintf("\n\n");
#endif
		seppuku();
	}
	return mychar;
}
/*
void clear_from_cursor_to_end() 
{
    CONSOLE_SCREEN_BUFFER_INFO csbi;
    HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
    DWORD charsWritten;
    DWORD consoleSize;

    // Ottieni le informazioni sul buffer della console
    if (GetConsoleScreenBufferInfo(hConsole, &csbi)) {
        // Calcola il numero di caratteri dalla posizione corrente del cursore alla fine della riga
        consoleSize = csbi.dwSize.X - csbi.dwCursorPosition.X;

        // Riempie lo spazio con spazi vuoti
        FillConsoleOutputCharacter(hConsole, ' ', consoleSize, csbi.dwCursorPosition, &charsWritten);
        
        // Riposiziona il cursore alla riga successiva
        COORD newCursorPos = {0, csbi.dwCursorPosition.Y + 1};
        SetConsoleCursorPosition(hConsole, newCursorPos);
    } 
	else {
        printf("Errore nel recupero delle informazioni della console.\n");
    }
}
*/

void printbar(char i_carattere,bool i_printbarraenne=true)
{
	if (flagpakka)
		return;
	int twidth=terminalwidth();
	if (twidth<10)
		twidth=100;
	for (int i=0;i<twidth-4;i++)
		myprintf("%c",i_carattere);
	if (i_printbarraenne)
		myprintf("\n");
}
void moreprint(const char* i_stringa,bool i_nocr=false)
{
	if (!i_stringa) return;  // Early return if string is null
	if (g_output_handle)
	{
		if (i_nocr)
			fprintf(g_output_handle,"%s",i_stringa);
		else
			fprintf(g_output_handle,"%s\n",i_stringa);
	}
	if (flagsilent)
		return;
	int larghezzaconsole=terminalwidth()-2;
	int altezzaconsole=terminalheight();
	static int righestampate=0;
	if (flagnomore)
	{
		if (i_nocr)
			printf("%s",i_stringa);
		else
			printf("%s\n",i_stringa);
		return;
	}
		
	if ((larghezzaconsole<0) || (altezzaconsole<0))
	{
		if (i_nocr)
			printf("%s",i_stringa);
		else
			printf("%s\n",i_stringa);
		return;
	}
	if (!i_nocr)
		if (!strcmp(i_stringa,"\n"))
		{
			printf("\n");
			righestampate++;
			if (righestampate>(altezzaconsole-2))
			{
				printf("-- More (q, Q or control C to exit) --\r");
				mygetch(true);
				for (int i=0;i<altezzaconsole;i++)
					printf("\n");
				righestampate=0;
			}
			return;
		}
	int lunghezzastringa=strlen(i_stringa);
	if (!larghezzaconsole)
		return;
	int righe	=(lunghezzastringa/larghezzaconsole)+1;
	int massimo	=lunghezzastringa-(larghezzaconsole*(righe-1));
	for (int riga=1; riga<=righe;riga++)
	{
		int currentmax=larghezzaconsole;
		if (riga==righe)
			currentmax=massimo;
		int startcarattere=(riga-1)*larghezzaconsole;
		for (int i=startcarattere;i<startcarattere+currentmax;i++)
			printf("%c",i_stringa[i]);
		if (!i_nocr)
		{
			printf("\n");
			righestampate++;
			if (righestampate>(altezzaconsole-2))
			{
				printf("-- More (q, Q or control C to exit) --\r");
				mygetch(true);
				for (int i=0;i<altezzaconsole;i++)
					printf("\n");
				righestampate=0;
			}
		}
	}
}
void morebar(const char i_carattere)
{
	int twidth=terminalwidth();
	if (twidth<10)
		twidth=100; // redirect
	if (twidth>100)
		twidth=100;
	std::string temp(twidth-4, i_carattere);
	moreprint(temp.c_str());
}
bool getcaptcha(const string& i_captcha,const string& i_reason)
{
	if (flagnocaptcha)
		return true;
	if (i_captcha=="")
		return false;
	if (i_reason=="")
		return false;
	printf("\nTo confirm a dangerous command\n");
	printf(">>> %s\n",i_reason.c_str());
	printf("enter EXACTLY the capcha, then press CR (return)\n");
	printf("Entering anything else will quit.\n");
	printf("\nCaptcha to continue:     %s\n",i_captcha.c_str());
	char myline[81];
    int dummy=scanf("%80s", myline);
	if (dummy==888888)	// compiler be quiet!
		printf("no-warning-please\n");
	if (myline!=i_captcha)
	{
		printf("Wrong captcha\n");
		return false;
	}
	myprintf("00034: Captcha OK\n");
	return true;
}

class franz_flags
{
	public:
	MAPPAFLAGS 	mappaflags;
	HELPFLAGS 	helpflags;
	HELPFLAGS 	helpflagsscope;

	bool get(const string& i_name)
	{
		MAPPAFLAGS::iterator p=mappaflags.find(i_name);
		if (p==mappaflags.end())
		{
			printf("42098: guru doing getflag %s \n",i_name.c_str());
			seppuku();
		}
		if (p->second==NULL)
		{
			printf("42913: guru empty pointer flag%s\n",i_name.c_str());
			seppuku();
		}
		return (*p->second);
	}
	bool exists(const string& i_name)
	{
		return (mappaflags.find(i_name)!=mappaflags.end());
	}
	void set(string i_name, bool i_value)
	{

		MAPPAFLAGS::iterator p=mappaflags.find(i_name);
		if (p==mappaflags.end())
		{
		}
		if (p==mappaflags.end())
		{
			printf("42923: GURU doing setflag %s \n",i_name.c_str());
			seppuku();
		}
		if (p->second==NULL)
		{
			printf("42928: guru empty pointer flag%s\n",i_name.c_str());
			seppuku();
		}

		(*p->second)=i_value;
	}
	void settrue(const string& i_name)
	{
		set(i_name,true);
	}
	void debugga()
	{
///		printf("48149: array franz flag size %s\n",migliaia(mappaflags.size()));

		for (MAPPAFLAGS::iterator p=mappaflags.begin(); p!=mappaflags.end(); ++p)
		{
			myprintf("48150: %-20s   %d ",p->first.c_str(),(int)*p->second);
			HELPFLAGS::iterator a=helpflags.find(p->first);
			if (p!=mappaflags.end())
				myprintf(" <<%s>>",a->second.c_str());
			myprintf("\n");
		}
	}
	void tutti()
	{
		for (MAPPAFLAGS::iterator p=mappaflags.begin(); p!=mappaflags.end(); ++p)
		{
			char buffer[200];
			HELPFLAGS::iterator a=helpflags.find(p->first);
			if (p!=mappaflags.end())
			{
				color_green();
				snprintf(buffer,sizeof(buffer),"%-20s",p->first.c_str());
				moreprint(buffer,true);
				color_restore();
				snprintf(buffer,sizeof(buffer)," %s",a->second.c_str());
				moreprint(buffer);
			}
		}
	}

	string compact()
	{

		string risultato="";
		for (MAPPAFLAGS::iterator p=mappaflags.begin(); p!=mappaflags.end(); ++p)
			if (*p->second)
				risultato+=p->first+' ';
		return risultato;
	}

	void add(bool* i_thebool,string i_name,string i_help,string i_helpscope,bool i_default=false)
	{
		if (i_name=="")
		{
			printf("48125: GURU i_name empty\n");
			seppuku();
		}
		if (i_thebool==NULL)
		{
			printf("09778: GURU thebool NULL\n");
			seppuku();
		}

		MAPPAFLAGS::iterator p=mappaflags.find(i_name);
		if (p==mappaflags.end())
		{
			*(i_thebool)=i_default;
			mappaflags.insert(std::pair<string, bool*>(i_name, i_thebool));
			if (i_help!="")
				helpflags.insert(std::pair<string, string>(i_name, i_help));
			if (i_helpscope!="")
				helpflags.insert(std::pair<string, string>(i_name, i_helpscope));
		}
	}

};

franz_flags	g_programflags;


char* stristr(const char* str1,const char* str2)
{
    const char* p1 = str1;
    const char* p2 = str2;
    const char* r = *p2 == 0 ? str1 : 0 ;
    while((*p1!=0) && (*p2!=0))
    {
        if( tolower((unsigned char)*p1)==tolower((unsigned char)*p2))
        {
            if(r==0)
                r=p1;
            p2++;
        }
        else
        {
            p2=str2;
            if(r!=0)
                p1=r+1;
            if(tolower((unsigned char)*p1)==tolower((unsigned char)*p2))
            {
                r=p1;
                p2++;
            }
            else
                r=0;
        }
        p1++;
    }
    return *p2 == 0 ? (char*)r : 0 ;
}
bool havedoublequote(const string i_filename)
{
	if (i_filename.length()==0)
		return false;
	else
	return i_filename[i_filename.size()-1]=='"';
}
string mypopback(const string& i_string)
{
	if (i_string=="")
		return "";
	return i_string.substr(i_string.size() - 1);
}
string cutdoublequote(const string& i_string)
{
	if (i_string=="")
		return "";
	string temp=i_string;
	if (havedoublequote(i_string))
		  temp=i_string.substr(i_string.size() - 1);
	return temp;
}
bool isextension(const char* i_filename,const char* i_ext)
{
	if (!i_filename)
		return false;
	if (!i_ext)
		return false;
	if (isdirectory(i_filename))
		return false;
	const char * posizione=stristr(i_filename, i_ext);
	if (!posizione)
		return false;
	return (posizione-i_filename)+strlen(i_ext)==strlen(i_filename);
}
bool iszpaq(const string i_filename)
{
	return isextension(i_filename.c_str(), ".zpaq");
}
#ifdef _WIN32
bool isexe(const string i_filename)
{
	return isextension(i_filename.c_str(), ".exe");
}
#endif
bool isxls(const string i_filename)
{
	return (isextension(i_filename.c_str(), ".xls") || isextension(i_filename.c_str(), ".ppt") || isextension(i_filename.c_str(), ".pps"));
}
bool isads(const string i_filename)
{
	if (i_filename.length()==0)
		return false;
	else
		return strstr(i_filename.c_str(), ":$DATA")!=0;
}
bool iszfs(const string i_filename)
{
	if (i_filename.length()==0)
		return false;
	else
		return strstr(i_filename.c_str(), ".zfs")!=0;
}
bool replace(std::string& str, const std::string& from, const std::string& to) {
    size_t start_pos = str.find(from);
    if(start_pos == std::string::npos)
        return false;
    str.replace(start_pos, from.length(), to);
    return true;
}
void myreplaceall(std::string& str, const std::string& from, const std::string& to) {
    if(from.empty())
        return;
    size_t start_pos = 0;
    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
        str.replace(start_pos, from.length(), to);
        start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx'
    }
}
string stringtolower(string i_stringa)
{
	for (unsigned int i=0;i<i_stringa.size();i++)
		i_stringa[i]=tolower(i_stringa[i]);
	return i_stringa;
}


string format_datetime(string i_formato,tm* t=NULL)
{
	char	temp[12];
	if (t==NULL)
	{
		time_t nowz=time(NULL);
		t=localtime(&nowz);
	}
	snprintf(temp,sizeof(temp),"%02d",t->tm_hour);
	string hour=temp;
	snprintf(temp,sizeof(temp),"%02d",t->tm_min);
	string min=temp;
	snprintf(temp,sizeof(temp),"%02d",t->tm_sec);
	string sec=temp;
	snprintf(temp,sizeof(temp),"%d",t->tm_wday);
	string weekday=temp;
	snprintf(temp,sizeof(temp),"%04d",t->tm_year+1900);
	string	year=temp;
	snprintf(temp,sizeof(temp),"%02d",t->tm_mon+1);
	string month=temp;
	snprintf(temp,sizeof(temp),"%02d",t->tm_mday);
	string day=temp;
	snprintf(temp,sizeof(temp),"%02d",(t->tm_yday-t->tm_wday+7)/7);
	string	week=temp;
	string date=year+'-'+month+'-'+day;
	string time=hour+'-'+min+'-'+sec;
	string datetime=date+'_'+time;
	myreplaceall(i_formato,"%hour",hour);
	myreplaceall(i_formato,"%min",min);
	myreplaceall(i_formato,"%sec",sec);
	myreplaceall(i_formato,"%weekday",weekday);
	myreplaceall(i_formato,"%year",year);
	myreplaceall(i_formato,"%month",month);
	myreplaceall(i_formato,"%day",day);
	myreplaceall(i_formato,"%week",week);
	myreplaceall(i_formato,"%timestamp",datetime);
	myreplaceall(i_formato,"%datetime",datetime);
	myreplaceall(i_formato,"%date",date);
	myreplaceall(i_formato,"%time",time);
	
	myreplaceall(i_formato,"$hour",hour);
	myreplaceall(i_formato,"$min",min);
	myreplaceall(i_formato,"$sec",sec);
	myreplaceall(i_formato,"$weekday",weekday);
	myreplaceall(i_formato,"$year",year);
	myreplaceall(i_formato,"$month",month);
	myreplaceall(i_formato,"$day",day);
	myreplaceall(i_formato,"$week",week);
	myreplaceall(i_formato,"$timestamp",datetime);
	myreplaceall(i_formato,"$datetime",datetime);
	myreplaceall(i_formato,"$date",date);
	myreplaceall(i_formato,"$time",time);

#ifdef _WIN32	
	string pcname=stringtolower(win_getcomputername());
	myreplaceall(i_formato,"$pcname",pcname);
	myreplaceall(i_formato,"$computername",pcname);
	string username=stringtolower(win_getusername());
	myreplaceall(i_formato,"$username",username);
#endif

	return i_formato;
}
bool myreplace(string& i_str, const string& i_from, const string& i_to)
{
    size_t start_pos = i_str.find(i_from);
    if(start_pos == std::string::npos)
        return false;
    i_str.replace(start_pos, i_from.length(), i_to);
    return true;
}
std::string myright(std::string const& source, size_t const length)
{
  if (length >= source.size())
 	return source;
  return source.substr(source.size() - length);
}
std::string myleft(std::string const& source, size_t const length)
{
  if (length >= source.size())
 	return source;
  return source.substr(0,length);
}
string mytrim(const string& i_str)
{
	size_t first = i_str.find_first_not_of(' ');
	if (string::npos == first)
		return i_str;
	size_t last = i_str.find_last_not_of(' ');
	return i_str.substr(first, ((last-first)+1));
}
void explode(string i_string,char i_delimiter,vector<string>& array)
{
	unsigned int i=0;
	while(i<i_string.size())
	{
		string temp="";
		while ((i<i_string.size()) && (i_string[i]!=i_delimiter))
        {
			temp+=i_string[i];
			i++;
		}
		array.push_back(temp);
		i++;
		if (i>=i_string.size())
			break;
    }
}
struct s_fileandsize
{
	string	filename;
	uint64_t size;
	int64_t attr;
	int64_t date;
	int64_t data;
	bool 	isdir;
	string 	hashhex;
	string 	hashtype;
	bool 	flaghashstored;
	string	writtenfilename;
	bool	hashok;
	bool	filenotfound;
	s_fileandsize(): filename(""),size(0),attr(0),date(0),data(-1),isdir(false),hashhex(""),hashtype(""),flaghashstored(false),writtenfilename(""),hashok(false),filenotfound(false) {}
};

const std::string WHITESPACE = " \n\r\t\f\v";
std::string myltrim(const std::string &s)
{
    size_t start = s.find_first_not_of(WHITESPACE);
    return (start == std::string::npos) ? "" : s.substr(start);
}
std::string myrtrim(const std::string &s)
{
    size_t end = s.find_last_not_of(WHITESPACE);
    return (end == std::string::npos) ? "" : s.substr(0, end + 1);
}
std::string mytrim2(const std::string &s)
{
    return myrtrim(myltrim(s));
}
string extractfilename(const string& i_string)
{
	size_t i = i_string.rfind('/', i_string.length());
	if (i != string::npos)
		return(i_string.substr(i+1, i_string.length() - i));
	return(i_string);
}
string prendiestensione(const string& s)
{
	if (isdirectory(s))
		return ("");
	string nomefile=extractfilename(s);
	size_t i = nomefile.rfind('.', nomefile.length());
	if (i != string::npos)
	{
		size_t lunghezzaestensione=nomefile.length() - i;
/// sometimes it is hard to get the extension: pippo.plutopaperino
		if (lunghezzaestensione>20)
			return("");
		return(nomefile.substr(i+1, lunghezzaestensione));
	}
   return("");
}
string extractfilepath(const string& i_string)
{
	size_t i = i_string.rfind('/', i_string.length());
	if (i != string::npos)
		return(i_string.substr(0, i+1));

	i = i_string.rfind('\\', i_string.length());
	if (i != string::npos)
		return(i_string.substr(0, i+1));
	return("");
}
string prendinomefileebasta(const string& s)
{
	string nomefile=extractfilename(s);
	size_t i = nomefile.rfind('.', nomefile.length());
	if (i != string::npos)
		return(nomefile.substr(0,i));
	return(nomefile);
}
string path(const string& fn)
{
	int n=0;
	for (int i=0; fn[i]; ++i)
		if (fn[i]=='/' || fn[i]=='\\')
			n=i+1;
	return fn.substr(0, n);
}
string purgeansi(string i_string,bool i_keeppath=false)
{
	if (i_string=="")
		return ("");
	string purged;
	for (unsigned int i=0;i<i_string.length();i++)
	{
		if (i_keeppath)
		{
			if ((i_string[i]==':') || (i_string[i]=='/') || (i_string[i]=='\\'))
			{
				purged+=i_string[i];
				continue;
			}
		}
		if (isalnum(i_string[i]))
			purged+=i_string[i];
		else
		{
			switch (i_string[i])
			{
/*
very forbiden
< (less than)
> (greater than)
: (colon)
" (double quote)
/ (forward slash)
\ (backslash)
| (vertical bar or pipe)
? (question mark)
* (asterisk)
*/
				case ' ':
				case '-':
				case '#':
				case '~':
				case '%':
				case '^':
				case '_':
				case '.':
				case '+':
				case '=':
				purged+=i_string[i];
				break;
				case '&':
				purged+="_and_";
				break;
				case ',':
				case '`':
				case '@':
				case '$':
				case '*':
				case '|':
				case ':':
				case ';':
				case '"':
				case '\'':
				case '<':
				case '>':
				case '\n':
				case '\r':
				case '\t':
				purged+='_';
				break;
				case '(':
				case '{':
				purged+='(';
				break;
				case ')':
				case '}':
				purged+=')';
				break;
				default:
				purged+='_';

			}
		}
	}
	return purged;
}
string forcelatinansi(string i_string)
{
	return i_string;
}
string purgedouble(const string& i_string,const string& i_from,const string& i_to)
{
	if (i_string=="")
		return("");
	if (i_from=="")
		return("");
	if (i_to=="")
		return("");
	string purged=i_string;
	myreplaceall(purged,i_from,i_to);
	return purged;
}
string compressemlfilename(const string& i_string)
{
	if (i_string=="")
		return("");
	string uniqfilename=extractfilename(i_string);
	string percorso=extractfilepath(i_string);
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"  "," ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"..",".");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Fw ","Fwd ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Fwd Fwd ","Fwd ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename," R "," Re ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"R Fwd ","Re Fwd");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename," RE "," Re ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Re Re ","Re ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Fwd Re Fwd Re ","Fwd Re ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Re Fwd Re Fwd ","Re Fwd ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename," SV SV "," SV ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Fwd FW ","Fwd ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"Fwd I ","Fwd ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"I Fwd ","Fwd ");
	for (int k=0;k<10;k++)
			uniqfilename=purgedouble(uniqfilename,"R Re ","Re ");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"__","_");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename," _ ","_");
	for (int k=0;k<10;k++)
		uniqfilename=purgedouble(uniqfilename,"  "," ");
	for (int k=uniqfilename.length()-1;k>0;k--)
	{
		if ((uniqfilename[k]=='-') || (uniqfilename[k]=='.') || (uniqfilename[k]==' '))
		{
			uniqfilename=mypopback(uniqfilename);
		}
		else
		{
			break;
		}
	}
	uniqfilename=mytrim2(uniqfilename);
	uniqfilename=percorso+uniqfilename;
	return uniqfilename;
}
FILE* freadopen(const char* i_filename)
{
#ifdef _WIN32
	wstring widename=utow(i_filename);
	FILE* myfp=_wfopen(widename.c_str(), L"rb" );
#else
	FILE* myfp=fopen(i_filename, "rb" );
#endif
	if (myfp==NULL)
	{
		if (flagdebug)
		{
			myprintf("\n");
			myprintf("00035! freadopen cannot open: %Z\n",i_filename);
		}
		return 0;
	}
	return myfp;
}
int64_t prendidimensionehandle(FILE* i_handle)
{
	if (i_handle==0)
		return 0;
	fseeko(i_handle, 0, SEEK_END);
	int64_t dimensione=ftello(i_handle);
	fseeko(i_handle, 0, SEEK_SET);
	return dimensione;
}
int64_t prendidimensionefile(const char* i_filename)
{
	if (!i_filename)
		return 0;
	FILE* myfile = freadopen(i_filename);
	if (myfile)
    {
		fseeko(myfile, 0, SEEK_END);
		int64_t dimensione=ftello(myfile);
		fclose(myfile);
		return dimensione;
	}
	else
	return 0;
}
#ifdef _WIN32
bool islonguncpath(string i_filename)
{
	if (i_filename.size()<8)
		return false;
	if (i_filename[0]=='/')
		if (i_filename[1]=='/')
			if (i_filename[2]=='?')
				if (i_filename[3]=='/')
					if (toupper(i_filename[4])=='U')
						if (toupper(i_filename[5])=='N')
							if (toupper(i_filename[6])=='C')
								if (i_filename[7]=='/')
									return true;
	return false;
}
#endif
bool islongpath(string i_filename)
{
	if (i_filename.size()<8)
		return false;
	if (i_filename[0]=='/')
		if (i_filename[1]=='/')
			if (i_filename[2]=='?')
				if (i_filename[3]=='/')
					if (isalpha(i_filename[4]))
						if (i_filename[5]==':')
							if (i_filename[6]=='/')
								return true;
	return false;
}
bool comparechar(char c1, char c2)
{
    if (c1 == c2)
        return true;
    else if (std::toupper(c1) == std::toupper(c2))
        return true;
    return false;
}
string stringtoupper(string i_stringa)
{
	for (unsigned int i=0;i<i_stringa.size();i++)
		i_stringa[i]=toupper(i_stringa[i]);
	return i_stringa;
}

int myposi(string i_substring,string i_string)
{
	// not very quick...
	i_substring=stringtolower(i_substring);
	i_string=stringtolower(i_string);
    size_t start_pos = i_string.find(i_substring);
    if	(start_pos==std::string::npos)
        return -1;
	else
		return start_pos;
}
bool stringcomparei(std::string str1, std::string str2)
{
    return ( (str1.size() == str2.size() ) &&
             std::equal(str1.begin(), str1.end(), str2.begin(), &comparechar) );
}
string timetohuman(int32_t i_seconds,int32_t i_fixedlength=0)
{
	if (i_seconds<=0)
	{
		if (i_fixedlength>=4)
			return "0000:00:00";
		else
		if (i_fixedlength==3)
			return "000:00:00";
		else
			return "00:00:00";
	}
	int h=(i_seconds/3600);
	int m=(i_seconds -(3600*h))/60;
	int s=(i_seconds -(3600*h)-(m*60));
	char	temporaneo[20];
	if ((h<=99) || (i_fixedlength==2))
		snprintf(temporaneo,sizeof(temporaneo),"%02d:%02d:%02d",h,m,s);
	else
	if ((h<=999) || (i_fixedlength==3))
		snprintf(temporaneo,sizeof(temporaneo),"%03d:%02d:%02d",h,m,s);
	else
		snprintf(temporaneo,sizeof(temporaneo),"%d:%02d:%02d",h,m,s);
	return temporaneo;
}

char* mytohuman(int64_t i_bytes,char* i_buffer,int i_buffersize)
{
	if (i_buffer==NULL)
	{
		myprintf("00037! guru i_buffer null\n");
		seppuku();
	}
	if (i_buffersize<5)
	{
		myprintf("00038! guru buffer too small\n");
		seppuku();
	}
	if (i_bytes<0)
	{
		snprintf(i_buffer,5,"neg");
		return i_buffer;
	}
	char 	const *myappend[] = {" B","KB","MB","GB","TB","PB"};
	char 	length = sizeof(myappend)/sizeof(myappend[0]);
	double 	mybytes=i_bytes;
	int i=0;
	if (i_bytes>1024)
		for (i=0;(i_bytes/1024)> 0 && (i<length-1); i++,i_bytes/=1024)
			mybytes=i_bytes/1024.0;
///	snprintf(i_buffer,sizeof(i_buffer),"%.02lf %s",mybytes,myappend[i]);
	snprintf(i_buffer,i_buffersize,"%.02f %s",mybytes,myappend[i]);
	return i_buffer;
}

inline char* tohuman(int64_t i_bytes)
{
	static char io_buf[30];
	return mytohuman(i_bytes,io_buf,30);
}
inline char* tohuman2(int64_t i_bytes)
{
	static char io_buf[30];
	return mytohuman(i_bytes,io_buf,30);
}
inline char* tohuman3(int64_t i_bytes)
{
	static char io_buf[30];
	return mytohuman(i_bytes,io_buf,30);
}
inline char* tohuman4(int64_t i_bytes)
{
	static char io_buf[30];
	return mytohuman(i_bytes,io_buf,30);
}
#ifdef _WIN32
inline char* tohuman5(int64_t i_bytes)
{
	static char io_buf[30];
	return mytohuman(i_bytes,io_buf,30);
}
#endif
// fix for Mac PowerPC (yes, no strlen here)
size_t mystrnlen(const char *i_string, size_t maxlen)
{
	if (i_string==NULL)
	{
		if (flagdebug)
			myprintf("00039! GURU null string\n");
		seppuku();  //safer to die
		exit(0);
		return 0;
	}
	if (maxlen==0)
	{
		if (flagdebug)
			myprintf("00040! GURU maxlen 0\n");
		seppuku();  //safer to die
		exit(0);
		return 0;
	}
	size_t len;
	for (len=0;len<maxlen;len++,i_string++) 
		if (!*i_string)
			break;
	return len;
}

int64_t myatoll(const char * i_str)
{
	if (i_str==NULL)
		return 0;
	
	if (mystrnlen(i_str,20)==20) /// this should be 21, 9999999...TB
	{
		myprintf("00041! GURU on very long number (longer than 20 chars)\n");
		seppuku();
		exit(0);
	}
	string	stringa=i_str;
	string	thedigit;
	string	thestring;
	for (unsigned int i=0;i<stringa.length();i++)
		if (isdigit(stringa[i]))
			thedigit+=stringa[i];
		else
		{
			if (isalpha(stringa[i]))
				thestring+=stringa[i];
		}
	int64_t	risultato=atoll(thedigit.c_str());
	if (flagdebug3)
	{
		myprintf("00042: integer (string) |%s| %s\n",thedigit.c_str(),migliaia(thedigit.size()));
		myprintf("00043: integer          |%lld|\n",risultato);
		myprintf("00044: text part        |%s|\n",thestring.c_str());
	}
	thestring=stringtoupper(thestring);

	if (thestring.size()==1)
	{
		if (toupper(thestring[0])=='K')
			risultato*=1000;
		else
		if (toupper(thestring[0])=='M')
			risultato*=1000000;
		else
		if (toupper(thestring[0])=='G')
			risultato*=1000000000;
		else
		if (toupper(thestring[0])=='T')
			risultato*=1000000000000LL;
	}
	else
	if (thestring.size()==2)
	{
		if (thestring=="KB")
			risultato*=1024;
		else
		if (thestring=="MB")
			risultato*=1048576;
		else
		if (thestring=="GB")
			risultato*=1073741824;
		else
		if (thestring=="TB")
			risultato*=1099511627776LL;
	}
	else
	{
		if (flagdebug)
			myprintf("00045! ERROR string size not 1 or 2, ignoring [%s]\n",migliaia(thestring.size()));
	}
	
	if (flagdebug)
		myprintf("00046: final from %s to %s (%s)\n",i_str,migliaia(risultato),tohuman(risultato));
	
	if (risultato<0)
	{
		myprintf("00047! GURU NEGATIVE! final from %s to %s (%s)\n",i_str,migliaia(risultato),tohuman(risultato));
		seppuku();
		exit(0);
		return 0;
	}
	return risultato;
}
/// no stoi() on old compiler
int mystoi(string i_string)
{
	if (i_string=="")
		return 0;
    int risultato=0;
    for (unsigned int i=0; i<i_string.size();i++)
		if (isdigit(i_string[i]))
			risultato=risultato*10+i_string[i]-'0';
	return risultato;
}
int64_t encodestringdate(string i_date,bool i_flagfrom)
{
	i_date=format_datetime(i_date);
	string purged;
	for (unsigned int i=0;i<i_date.length();i++)
		if (isdigit(i_date[i]))
			purged+=i_date[i];
	int lunghezza=purged.length();
	if ((lunghezza>14))
	{
		myprintf("00048! datelength >14 (%d) |%s|\n",lunghezza,purged.c_str());
		return -1;
	}
	if (lunghezza%2!=0)
	{
		myprintf("00049! datelength must be even (use leading zeros) (%d) |%s|\n",lunghezza,purged.c_str());
		return -1;
	}

	int year	=0;
	int month	=0;
	int day		=0;
	int hour	=0;
	int minute	=0;
	int second	=0;
	if (lunghezza>=4)
		year	=mystoi(purged.substr(0,4));
	if (lunghezza>=6)
		month	=mystoi(purged.substr(4,2));
	if (lunghezza>=8)
		day		=mystoi(purged.substr(6,2));
	if (lunghezza>=10)
		hour	=mystoi(purged.substr(8,2));
	if (lunghezza>=12)
		minute	=mystoi(purged.substr(10,2));
	if (lunghezza>=14)
		second	=mystoi(purged.substr(12,2));

	if (i_flagfrom)
	{
		if (lunghezza==4) /// 2022
		{
			month	=1;
			day		=1;
		}
		if (lunghezza==6) /// 202209
			day		=1;
	}
	else
	{
		if (lunghezza==4) /// 2022
		{
			month	=12;
			day		=31;
		}
		if (lunghezza==6) /// 202209
		{
			if (month==2)
				day=28;
			else
			if ((month==4) || (month==6) || (month==9) || (month==11))
				day	=30;
			else
				day=31;
		}
	}
	if (flagdebug2)
		myprintf("00050: date   %04d-%02d-%02d %02d:%02d:%02d\n",year,month,day,hour,minute,second);
	if ((year<1970) || (year>2070))
	{
		myprintf("00051! year not from 1970 to 2070\n");
		return -1;
	}
	if ((month<1) || (month>12))
	{
		myprintf("00052! month not in [01..12]. Use leading zero (not 3 but 03)\n");
		return -1;
	}
	if ((day<1) || (day>31))
	{
		myprintf("00053! day not in [01..31]. Use leading zero (not 4 but 04)\n");
		return -1;
	}
	if (hour>24)
	{
		myprintf("00054! hour >24\n");
		return -1;
	}
	if (minute>60)
	{
		myprintf("00055! minute >60\n");
		return -1;
	}
	if (second>60)
	{
		myprintf("00056! second >60\n");
		return -1;
	}
	bool isleap= (((year % 4 == 0) &&
         (year % 100 != 0)) ||
         (year % 400 == 0));

    if (month == 2)
    {
        if (isleap)
		{
			if (!(day <=29))
			{
				myprintf("00057! leap year, feb must be <=29\n");
				return -1;
			}
		}
        else
			if (!(day <=28))
			{
				myprintf("00058! NO leap year, feb must be <=28\n");
				return -1;
			}
    }
    if ((month==4) || (month==6) || (month==9) || (month==11))
		if (!(day <= 30))
		{
			myprintf("00059! this month (%d) cannot have more than 30 days\n",month);
			return -1;
		}

	return 	year	*10000000000LL
		+	month	*100000000LL
		+	day		*1000000
		+	hour	*10000
		+	minute	*100
		+	second;
}


// Convert non-negative decimal number x to string of at least n digits
string itos(int64_t x, int n=1) {
  assert(x>=0);
  assert(n>=0);
  string r;
  for (; x || n>0; x/=10, --n) r=string(1, '0'+x%10)+r;
  return r;
}
// Replace * and ? in fn with part or digits of part
string subpart(string fn, int part) {
  for (int j=fn.size()-1; j>=0; --j) {
    if (fn[j]=='?')
      fn[j]='0'+part%10, part/=10;
    else if (fn[j]=='*')
      fn=fn.substr(0, j)+itos(part)+fn.substr(j+1), part=0;
  }
  return fn;
}
// Return relative time in milliseconds
/// Slow, working on string instead of char *. But who cares?
#define NO_WARNING_PLEASE 36
string ConvertUtcToLocalTime(const string& i_date)
{
#if defined(_WIN32_WINNT) && ((_WIN32_WINNT == 0x0501) || (_WIN32_WINNT == 0x0502))
	return i_date;
#endif
	
	if (flagdebug3)
	{
		myprintf("\n");
		myprintf("00060: converting to localtime %s\n",i_date.c_str());
	}
	
	if (i_date.length()!=19)
	{
		myprintf("25854$ i_date is not 19 chars long |%s|\n",i_date.c_str());
		return i_date;
	}

	struct tm t;
	memset(&t,0,sizeof(t));
	t.tm_year 	= atoi(i_date.c_str())-1900;
	t.tm_mon 	= atoi(i_date.c_str()+5)-1;
	t.tm_mday 	= atoi(i_date.c_str()+8);
	t.tm_hour 	= atoi(i_date.c_str()+11);
	t.tm_min 	= atoi(i_date.c_str()+14);
	t.tm_sec 	= atoi(i_date.c_str()+17);


#ifdef _WIN32
//
#ifdef _WIN64
	time_t tt = _mkgmtime64(&t);
#else

time_t tt =-1;
///	time_t tt = _mkgmtime32(&t);
#endif
	if (tt==-1)
		return i_date;
	struct tm* t2=NULL;
	t2 = &t;
	*t2 = *localtime(&tt);
#else
	/// not Windows
	time_t utcTime = timegm(&t); // converte in UTC
	const struct tm* t2 = localtime(&utcTime);
	if (t2==NULL)
	{
		myprintf("25871$ ERROR in localtime!\n");
		return i_date;
	}
#endif
	char ds[NO_WARNING_PLEASE];
	memset(ds,0,NO_WARNING_PLEASE);
	
	snprintf(ds,NO_WARNING_PLEASE, "%04d-%02d-%02d %02d:%02d:%02d",
         t2->tm_year + 1900,
         t2->tm_mon + 1,
         t2->tm_mday,
         t2->tm_hour,
         t2->tm_min,
         t2->tm_sec);
		 
	///snprintf(ds,sizeof(ds),"%.4d-%.2d-%.2d %.2d:%.2d:%.2d", t2->tm_year + 1900,t2->tm_mon + 1, t2->tm_mday, t2->tm_hour, t2->tm_min, t2->tm_sec);
	if (flagdebug)
		myprintf("00061: localtime is %s\n",ds);
	return ds;
}
// Convert 64 bit decimal YYYYMMDDHHMMSS to "YYYY-MM-DD HH:MM:SS"
// where -1 = unknown date, 0 = deleted.
string dateToString(bool i_flagutc,int64_t date,bool i_mylocal=false)
{
  if (date<=0) return "                   ";
  string s="0000-00-00 00:00:00";
  static const int t[]={18,17,15,14,12,11,9,8,6,5,3,2,1,0};
  for (int i=0; i<14; ++i) s[t[i]]+=int(date%10), date/=10;
  if (!i_flagutc)
		s=ConvertUtcToLocalTime(s);
  if (i_mylocal)
  {
	///string "0000-00-00 00:00:00";
		///	0123 56 89 12345
	char mylocal[30];
	mylocal[ 0]=s.at(8);
	mylocal[ 1]=s.at( 9);
	mylocal[ 2]='/';
	mylocal[ 3]=s.at( 5);
	mylocal[ 4]=s.at( 6);
	mylocal[ 5]='/';
	mylocal[ 6]=s.at( 0);
	mylocal[ 7]=s.at( 1);
	mylocal[ 8]=s.at( 2);
	mylocal[ 9]=s.at( 3);
	mylocal[10]=' ';
	mylocal[11]=' ';
	mylocal[12]=s.at(11);
	mylocal[13]=s.at(12);
	mylocal[14]=s.at(13);
	mylocal[15]=s.at(14);
	mylocal[16]=s.at(15);
	mylocal[17]=0;
	s=mylocal;
  }
  return s;
}
int64_t now()
{
	time_t 	mynow=time(NULL);
	const tm* 	t=localtime(&mynow);
	if (t==NULL)
		return 0;
	return	(t->tm_year+1900)	*10000000000LL
		+	(t->tm_mon+1)		*100000000LL
		+	t->tm_mday			*1000000
		+	t->tm_hour			*10000
		+	t->tm_min			*100
		+	t->tm_sec;
}

string attrToString(int64_t attrib) {
  string r="     ";
  if ((attrib&255)=='u') {
    r[0]="0pc3d5b7 9lBsDEF"[(attrib>>20)&15];
    for (int i=0; i<4; ++i)
      r[4-i]=(attrib>>(8+3*i))%8+'0';
  }
  else if ((attrib&255)=='w') {
    for (int i=0, j=0; i<32; ++i) {
      if ((attrib>>(i+8))&1) {
        char c="RHS DAdFTprCoIEivs89012345678901"[i];
        if (j<5) r[j]=c;
        else r+=c;
        ++j;
      }
    }
  }
  return r;
}
// Convert seconds since 0000 1/1/1970 to 64 bit decimal YYYYMMDDHHMMSS
// Valid from 1970 to 2099.
int64_t decimal_time(time_t tt) {
  if (tt==-1) tt=0;
  int64_t t=(sizeof(tt)==4) ? unsigned(tt) : tt;
  const int second=t%60;
  const int minute=t/60%60;
  const int hour=t/3600%24;
  t/=86400;  // days since Jan 1 1970
  const int term=t/1461;  // 4 year terms since 1970
  t%=1461;
  t+=(t>=59);  // insert Feb 29 on non leap years
  t+=(t>=425);
  t+=(t>=1157);
  const int year=term*4+t/366+1970;  // actual year
  t%=366;
  t+=(t>=60)*2;  // make Feb. 31 days
  t+=(t>=123);   // insert Apr 31
  t+=(t>=185);   // insert June 31
  t+=(t>=278);   // insert Sept 31
  t+=(t>=340);   // insert Nov 31
  const int month=t/31+1;
  const int day=t%31+1;
  return year*10000000000LL+month*100000000+day*1000000
         +hour*10000+minute*100+second;
}
// Convert decimal date to time_t - inverse of decimal_time()
time_t unix_time(int64_t date) {
  if (date<=0) return -1;
  static const int days[12]={0,31,59,90,120,151,181,212,243,273,304,334};
  const int year=date/10000000000LL%10000;
  const int month=(date/100000000%100-1)%12;
  const int day=date/1000000%100;
  const int hour=date/10000%100;
  const int min=date/100%100;
  const int sec=date%100;
  if (month<0)
	  return 0;
  return (day-1+days[month]+(year%4==0 && month>1)+((year-1970)*1461+1)/4)
    *86400+hour*3600+min*60+sec;
}
/*
	section: errors
*/
#ifdef BSD
int64_t fsbtoblk(int64_t num, uint64_t fsbs, u_long bs)
{
	return (num * (intmax_t) fsbs / (int64_t) bs);
}
#endif

#ifdef _WIN32
bool isdospath(const string& i_filename)
{
	if (i_filename.size()>3)
		if (isalpha(i_filename[0]))
			if (i_filename[1]==':')
					return true;
	return false;
}
#endif


bool iswindowspath(const string& i_filename)
{
///	printf("Y1 |%s|\n",i_filename.c_str());
	if (i_filename.size()>3)
		if (isalpha(i_filename[0]))
			if (i_filename[1]==':')
				if ((i_filename[2]=='\\') || (i_filename[2]=='/'))
				{
				//	printf("trueeeeeeeeeeeeeee\n");
					return true;
				} 
	return false;
}
bool iswindowsunc(const string& i_filename)
{
	if (i_filename=="")
			return false;
	if (i_filename.size()<3)
			return false;
	if (i_filename[0]!='/')
			return false;
	if (i_filename[2]=='?') // longpath
	{
		if (i_filename.size()>5) 
			if (i_filename[5]==':')
				return false;
		return true;
	}
	bool	foundslash=false;
	for (unsigned int i=3;i<i_filename.size();i++)
		if (i_filename[i]=='/')
		{
			foundslash=true;
			break;
		}

	return foundslash;
}
#ifdef _WIN32
string getfirstwindowsuncdir(const string& i_filename)
{
	if (i_filename=="")
			return "";
	if (i_filename[0]!='/')
			return "";
	if (i_filename[1]!='/')
			return "";
	string	theserver="";
	string 	theshare="";
	string 	firstshare="";
	for (unsigned int i=3;i<i_filename.size();i++)
		if (i_filename[i]=='/')
		{
			theserver=i_filename.substr(0,i+1);
			theshare=i_filename.substr(i+1,i_filename.size());
			break;
		}
	for (unsigned int i=0;i<theshare.size();i++)
		if (theshare[i]=='/')
		{
			firstshare=theshare.substr(0,i);
			break;
		}
	return theserver+firstshare;
}
#endif

bool direxists(string i_directory)
{
#ifdef unix
	struct stat sb;
    return ((stat(i_directory.c_str(), &sb) == 0) && S_ISDIR(sb.st_mode));
#endif
#ifdef _WIN32
	HANDLE	myhandle;
	WIN32_FIND_DATA findfiledata;
	if (!isdirectory(i_directory))
		i_directory+="/";
	std::string pattern=i_directory+"*.*";
	///printf("panno %s\n",pattern.c_str());
	std::wstring wpattern=utow(pattern.c_str());
	myhandle=FindFirstFile(wpattern.c_str(),&findfiledata);
	if (myhandle!=INVALID_HANDLE_VALUE)
	{
		FindClose(myhandle);
		return true;
	}
	return false;
#endif
	return false;
}
#ifdef _WIN32
string	trimbarra(string i_path)
{
	if (isdirectory(i_path))
		return i_path.substr(0, i_path.size()-1);
	return i_path;
}
string	makeshortpath(string i_path)
{
	if (islongpath(i_path))
		return myright(i_path,i_path.size()-4);
	return i_path;
}
#endif
string makelongpath(string i_path)
{
#ifdef _WIN32
	if (flaglongpath)
	{
		if (flagdebug3)
			myprintf("00062: makelongpath of %s\n",i_path.c_str());

		if (iswindowspath(i_path))
		{
			if (flagdebug3)
				myprintf("00063: it is a windows path |%s|\n",i_path.c_str());

			if (!islongpath(i_path))
			{
				if (flagdebug3)
					myprintf("00064: this is NOT a longpath |%s|\n",i_path.c_str());
				return "//?/"+i_path;
			}
		}
		else
		{
			if (flagdebug3)
				myprintf("00065: NOT a iswindowspath |%s|\n",i_path.c_str());
		}
	}
#endif
	if (flagdebug2)
		myprintf("00066: return makelongpath i_path |%s|\n",i_path.c_str());
	return i_path;
}
/// it is not easy, at all, to take *nix free filesystem space
int64_t getfreespace(string i_path)
{
#ifndef _WIN32
	if (i_path!="")
		if (!direxists(i_path))
		{
			myprintf("00067! Path does not exists %Z\n",i_path.c_str());
			
			vector<string> pezzi;
			explode(i_path,'/',pezzi);
			if (pezzi.size()>=2)
			{
				string percorso="/";
				for (unsigned int i=1;i<pezzi.size()-1;i++)
					percorso+=pezzi[i]+"/";
				if (percorso!="/")
				{
					
					if (direxists(percorso))
						myprintf("00071: Getting free space for %Z\n",percorso.c_str());
					else
						myprintf("00072! Sorry: cannot find the path heuristically %Z\n",percorso.c_str());
										
				}
			}
		}
#endif

#ifdef BSD

#ifdef __NetBSD__
	if (flagdebug)
		myprintf("26116: NetBSD: getfreespace\n");
    struct statvfs stat;
	if (statvfs(i_path.c_str(), &stat) != 0) 
	{
        myprintf("26122$ NetBSD statvfs KAPUTT\n");
        return 0;
    }
	return (int64_t)stat.f_frsize * (int64_t)stat.f_bfree;
		
	///return (int64_t)stat.f_bsize * (int64_t)stat.f_bfree;
#else
	if (flagdebug)
		myprintf("00073: BSD: getfreespace\n");
	struct statfs stat;
	if (statfs(i_path.c_str(),&stat)!=0)
		return 0;
	static long blocksize = 0;
	int dummy;
	if (blocksize == 0)
		getbsize(&dummy, &blocksize);
	return  fsbtoblk(stat.f_bavail,stat.f_bsize, blocksize)*1024;
#endif
	
	
#else
#if defined(__linux__) || (defined(__APPLE__) && defined(__MACH__) && (!defined(__ppc__))) || defined(SOLARIS)
	if (flagdebug)
		myprintf("00074: LINUX: getfreespace\n");
	struct statvfs stat;
	if (statvfs(i_path.c_str(),&stat)!=0)
		return 0;
	return stat.f_bsize * stat.f_bavail;
#endif
#endif
#ifdef _WIN32
	uint64_t spazio=0;
	i_path=makeshortpath(i_path);
	if (iswindowsunc(i_path))
	{
		string mydir=getfirstwindowsuncdir(i_path);
		i_path=mydir;
	}
	BOOL  fResult;
	unsigned __int64 i64FreeBytesToCaller,i64TotalBytes,i64FreeBytes;
	WCHAR  *pszDrive  = NULL, szDrive[4];
	const size_t WCHARBUF = 512;
	wchar_t  wszDest[WCHARBUF];
	MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, i_path.c_str(), -1, wszDest, WCHARBUF);
	pszDrive = wszDest;
	if (i_path[1] == ':')
	{
		szDrive[0] = pszDrive[0];
		szDrive[1] = ':';
        szDrive[2] = '\\';
        szDrive[3] = '\0';
		pszDrive = szDrive;
	}
	fResult = GetDiskFreeSpaceEx ((LPCTSTR)pszDrive,
                                 (PULARGE_INTEGER)&i64FreeBytesToCaller,
                                 (PULARGE_INTEGER)&i64TotalBytes,
                                 (PULARGE_INTEGER)&i64FreeBytes);
	if (fResult)
		spazio=i64FreeBytes;
	return spazio; // Windows
#endif
	myprintf("00075! WARNING CANNOT GET FREE DISK SPACE!\n");
	return 0;
}
#ifdef unix
void printerr(const char* i_where,const char* filename,int32_t i_fileattr)
{
	if (flagquiet)
		return;
	string lasterror=i_where;
	string lasterror2=filename;
	string risultato=lasterror+"!"+lasterror2;
	if (i_fileattr==777442)
		lasterror="COMPILER_BE_QUIET";
	perror(risultato.c_str());
}
#else
string decodewinattribute(int32_t i_attribute)
{
	string risultato="";
	if (i_attribute & FILE_ATTRIBUTE_ARCHIVE)
		risultato+="ARCHIVE;";
	if (i_attribute & FILE_ATTRIBUTE_COMPRESSED)
		risultato+="COMPRESSED;";
	if (i_attribute & FILE_ATTRIBUTE_DEVICE)
		risultato+="DEVICE;";
	if (i_attribute & FILE_ATTRIBUTE_DIRECTORY)
		risultato+="DIRECTORY;";
	if (i_attribute & FILE_ATTRIBUTE_ENCRYPTED)
		risultato+="ENCRYPTED;";
	if (i_attribute & FILE_ATTRIBUTE_HIDDEN)
		risultato+="HIDDEN;";
	if (i_attribute & 32768)
		risultato+="INTEGRITY_STREAM;";
	if (i_attribute & FILE_ATTRIBUTE_NORMAL)
		risultato+="NORMAL;";
	if (i_attribute & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
		risultato+="NOT_CONTENT_INDEXED;";
	if (i_attribute & 131072)
		risultato+="NO_SCRUB_DATA;";
	if (i_attribute & FILE_ATTRIBUTE_OFFLINE)
		risultato+="OFFLINE;";
	if (i_attribute & FILE_ATTRIBUTE_READONLY)
		risultato+="READONY;";
	if (i_attribute & 4194304 )
		risultato+="RECALL_ON_DATA_ACCESS;";
	if (i_attribute & 262144 )
		risultato+="RECALL_ON_OPEN;";
	if (i_attribute & FILE_ATTRIBUTE_REPARSE_POINT)
		risultato+="REPARSE_POINT;";
	if (i_attribute & FILE_ATTRIBUTE_SPARSE_FILE)
		risultato+="SPARSE_FILE;";
	if (i_attribute & FILE_ATTRIBUTE_SYSTEM)
		risultato+="SYSTEM;";
	if (i_attribute & FILE_ATTRIBUTE_TEMPORARY)
		risultato+="TEMPORARY;";
	if (i_attribute & FILE_ATTRIBUTE_VIRTUAL)
		risultato+="VIRTUAL;";
	return risultato;
}
string	decodewinerror(DWORD	i_error,const char* i_filename)
{
	string 	risultato	="";
	char buffer[100];

	if (i_error==0L) risultato="ERROR_SUCCESS";
	else
	if (i_error==1L) risultato="ERROR_INVALID_FUNCTION";
	else
	if (i_error==2L) risultato="ERROR_FILE_NOT_FOUND";
	else
	if (i_error==3L)
	{
			risultato="ERROR_PATH_NOT_FOUND";
			if (i_filename)
				if (strlen(i_filename)>255)
				{
					snprintf(buffer,sizeof(buffer),"%08d",(int)strlen(i_filename));
					string lunghezza=buffer;
					risultato+=" : maybe length "+lunghezza+" >255?";
					g_255++;
				}
	}
	else
	if (i_error==4L) risultato="ERROR_TOO_MANY_OPEN_FILES";
	else
	if (i_error==5L) risultato="ERROR_ACCESS_DENIED";
	else
	if (i_error==6L) risultato="ERROR_INVALID_HANDLE";
	else
	if (i_error==7L) risultato="ERROR_ARENA_TRASHED";
	else
	if (i_error==8L) risultato="ERROR_NOT_ENOUGH_MEMORY";
	else
	if (i_error==9L) risultato="ERROR_INVALID_BLOCK";
	else
	if (i_error==10L) risultato="ERROR_BAD_ENVIRONMENT";
	else
	if (i_error==11L) risultato="ERROR_BAD_FORMAT";
	else
	if (i_error==12L) risultato="ERROR_INVALID_ACCESS";
	else
	if (i_error==13L) risultato="ERROR_INVALID_DATA";
	else
	if (i_error==14L) risultato="ERROR_OUTOFMEMORY";
	else
	if (i_error==15L) risultato="ERROR_INVALID_DRIVE";
	else
	if (i_error==16L) risultato="ERROR_CURRENT_DIRECTORY";
	else
	if (i_error==17L) risultato="ERROR_NOT_SAME_DEVICE";
	else
	if (i_error==18L) risultato="ERROR_NO_MORE_FILES";
	else
	if (i_error==19L) risultato="ERROR_WRITE_PROTECT";
	else
	if (i_error==20L) risultato="ERROR_BAD_UNIT";
	else
	if (i_error==21L) risultato="ERROR_NOT_READY";
	else
	if (i_error==22L) risultato="ERROR_BAD_COMMAND";
	else
	if (i_error==23L) risultato="ERROR_CRC";
	else
	if (i_error==24L) risultato="ERROR_BAD_LENGTH";
	else
	if (i_error==25L) risultato="ERROR_SEEK";
	else
	if (i_error==26L) risultato="ERROR_NOT_DOS_DISK";
	else
	if (i_error==27L) risultato="ERROR_SECTOR_NOT_FOUND";
	else
	if (i_error==28L) risultato="ERROR_OUT_OF_PAPER";
	else
	if (i_error==29L) risultato="ERROR_WRITE_FAULT";
	else
	if (i_error==30L) risultato="ERROR_READ_FAULT";
	else
	if (i_error==31L) risultato="ERROR_GEN_FAILURE";
	else
	if (i_error==32L) risultato="ERROR_SHARING_VIOLATION";
	else
	if (i_error==33L) risultato="ERROR_LOCK_VIOLATION";
	else
	if (i_error==34L) risultato="ERROR_WRONG_DISK";
	else
	if (i_error==36L) risultato="ERROR_SHARING_BUFFER_EXCEEDED";
	else
	if (i_error==38L) risultato="ERROR_HANDLE_EOF";
	else
	if (i_error==39L) risultato="ERROR_HANDLE_DISK_FULL";
	else
	if (i_error==50L) risultato="ERROR_NOT_SUPPORTED";
	else
	if (i_error==51L) risultato="ERROR_REM_NOT_LIST";
	else
	if (i_error==52L) risultato="ERROR_DUP_NAME";
	else
	if (i_error==53L) risultato="ERROR_BAD_NETPATH";
	else
	if (i_error==54L) risultato="ERROR_NETWORK_BUSY";
	else
	if (i_error==55L) risultato="ERROR_DEV_NOT_EXIST";
	else
	if (i_error==56L) risultato="ERROR_TOO_MANY_CMDS";
	else
	if (i_error==57L) risultato="ERROR_ADAP_HDW_ERR";
	else
	if (i_error==58L) risultato="ERROR_BAD_NET_RESP";
	else
	if (i_error==59L) risultato="ERROR_UNEXP_NET_ERR";
	else
	if (i_error==60L) risultato="ERROR_BAD_REM_ADAP";
	else
	if (i_error==61L) risultato="ERROR_PRINTQ_FULL";
	else
	if (i_error==62L) risultato="ERROR_NO_SPOOL_SPACE";
	else
	if (i_error==63L) risultato="ERROR_PRINT_CANCELLED";
	else
	if (i_error==64L) risultato="ERROR_NETNAME_DELETED";
	else
	if (i_error==65L) risultato="ERROR_NETWORK_ACCESS_DENIED";
	else
	if (i_error==66L) risultato="ERROR_BAD_DEV_TYPE";
	else
	if (i_error==67L) risultato="ERROR_BAD_NET_NAME";
	else
	if (i_error==68L) risultato="ERROR_TOO_MANY_NAMES";
	else
	if (i_error==69L) risultato="ERROR_TOO_MANY_SESS";
	else
	if (i_error==70L) risultato="ERROR_SHARING_PAUSED";
	else
	if (i_error==71L) risultato="ERROR_REQ_NOT_ACCEP";
	else
	if (i_error==72L) risultato="ERROR_REDIR_PAUSED";
	else
	if (i_error==80L) risultato="ERROR_FILE_EXISTS";
	else
	if (i_error==82L) risultato="ERROR_CANNOT_MAKE";
	else
	if (i_error==83L) risultato="ERROR_FAIL_I24";
	else
	if (i_error==84L) risultato="ERROR_OUT_OF_STRUCTURES";
	else
	if (i_error==85L) risultato="ERROR_ALREADY_ASSIGNED";
	else
	if (i_error==86L) risultato="ERROR_INVALID_PASSWORD";
	else
	if (i_error==87L) risultato="ERROR_INVALID_PARAMETER";
	else
	if (i_error==88L) risultato="ERROR_NET_WRITE_FAULT";
	else
	if (i_error==89L) risultato="ERROR_NO_PROC_SLOTS";
	else
	if (i_error==100L) risultato="ERROR_TOO_MANY_SEMAPHORES";
	else
	if (i_error==101L) risultato="ERROR_EXCL_SEM_ALREADY_OWNED";
	else
	if (i_error==102L) risultato="ERROR_SEM_IS_SET";
	else
	if (i_error==103L) risultato="ERROR_TOO_MANY_SEM_REQUESTS";
	else
	if (i_error==104L) risultato="ERROR_INVALID_AT_INTERRUPT_TIME";
	else
	if (i_error==105L) risultato="ERROR_SEM_OWNER_DIED";
	else
	if (i_error==106L) risultato="ERROR_SEM_USER_LIMIT";
	else
	if (i_error==107L) risultato="ERROR_DISK_CHANGE";
	else
	if (i_error==108L) risultato="ERROR_DRIVE_LOCKED";
	else
	if (i_error==109L) risultato="ERROR_BROKEN_PIPE";
	else
	if (i_error==110L) risultato="ERROR_OPEN_FAILED";
	else
	if (i_error==111L) risultato="ERROR_BUFFER_OVERFLOW";
	else
	if (i_error==112L) risultato="ERROR_DISK_FULL";
	else
	if (i_error==113L) risultato="ERROR_NO_MORE_SEARCH_HANDLES";
	else
	if (i_error==114L) risultato="ERROR_INVALID_TARGET_HANDLE";
	else
	if (i_error==117L) risultato="ERROR_INVALID_CATEGORY";
	else
	if (i_error==118L) risultato="ERROR_INVALID_VERIFY_SWITCH";
	else
	if (i_error==119L) risultato="ERROR_BAD_DRIVER_LEVEL";
	else
	if (i_error==120L) risultato="ERROR_CALL_NOT_IMPLEMENTED";
	else
	if (i_error==121L) risultato="ERROR_SEM_TIMEOUT";
	else
	if (i_error==122L) risultato="ERROR_INSUFFICIENT_BUFFER";
	else
	if (i_error==123L) risultato="ERROR_INVALID_NAME";
	else
	if (i_error==124L) risultato="ERROR_INVALID_LEVEL";
	else
	if (i_error==125L) risultato="ERROR_NO_VOLUME_LABEL";
	else
	if (i_error==126L) risultato="ERROR_MOD_NOT_FOUND";
	else
	if (i_error==127L) risultato="ERROR_PROC_NOT_FOUND";
	else
	if (i_error==128L) risultato="ERROR_WAIT_NO_CHILDREN";
	else
	if (i_error==129L) risultato="ERROR_CHILD_NOT_COMPLETE";
	else
	if (i_error==130L) risultato="ERROR_DIRECT_ACCESS_HANDLE";
	else
	if (i_error==131L) risultato="ERROR_NEGATIVE_SEEK";
	else
	if (i_error==132L) risultato="ERROR_SEEK_ON_DEVICE";
	else
	if (i_error==133L) risultato="ERROR_IS_JOIN_TARGET";
	else
	if (i_error==134L) risultato="ERROR_IS_JOINED";
	else
	if (i_error==135L) risultato="ERROR_IS_SUBSTED";
	else
	if (i_error==136L) risultato="ERROR_NOT_JOINED";
	else
	if (i_error==137L) risultato="ERROR_NOT_SUBSTED";
	else
	if (i_error==138L) risultato="ERROR_JOIN_TO_JOIN";
	else
	if (i_error==139L) risultato="ERROR_SUBST_TO_SUBST";
	else
	if (i_error==140L) risultato="ERROR_JOIN_TO_SUBST";
	else
	if (i_error==141L) risultato="ERROR_SUBST_TO_JOIN";
	else
	if (i_error==142L) risultato="ERROR_BUSY_DRIVE";
	else
	if (i_error==143L) risultato="ERROR_SAME_DRIVE";
	else
	if (i_error==144L) risultato="ERROR_DIR_NOT_ROOT";
	else
	if (i_error==145L) risultato="ERROR_DIR_NOT_EMPTY";
	else
	if (i_error==146L) risultato="ERROR_IS_SUBST_PATH";
	else
	if (i_error==147L) risultato="ERROR_IS_JOIN_PATH";
	else
	if (i_error==148L) risultato="ERROR_PATH_BUSY";
	else
	if (i_error==149L) risultato="ERROR_IS_SUBST_TARGET";
	else
	if (i_error==150L) risultato="ERROR_SYSTEM_TRACE";
	else
	if (i_error==151L) risultato="ERROR_INVALID_EVENT_COUNT";
	else
	if (i_error==152L) risultato="ERROR_TOO_MANY_MUXWAITERS";
	else
	if (i_error==153L) risultato="ERROR_INVALID_LISTFORMAT";
	else
	if (i_error==154L) risultato="ERROR_LABEL_TOO_LONG";
	else
	if (i_error==155L) risultato="ERROR_TOO_MANY_TCBS";
	else
	if (i_error==156L) risultato="ERROR_SIGNAL_REFUSED";
	else
	if (i_error==157L) risultato="ERROR_DISCARDED";
	else
	if (i_error==158L) risultato="ERROR_NOT_LOCKED";
	else
	if (i_error==159L) risultato="ERROR_BAD_THREADID_ADDR";
	else
	if (i_error==160L) risultato="ERROR_BAD_ARGUMENTS";
	else
	if (i_error==161L) risultato="ERROR_BAD_PATHNAME";
	else
	if (i_error==162L) risultato="ERROR_SIGNAL_PENDING";
	else
	if (i_error==164L) risultato="ERROR_MAX_THRDS_REACHED";
	else
	if (i_error==167L) risultato="ERROR_LOCK_FAILED";
	else
	if (i_error==170L) risultato="ERROR_BUSY";
	else
	if (i_error==171L) risultato="ERROR_DEVICE_SUPPORT_IN_PROGRESS";
	else
	if (i_error==173L) risultato="ERROR_CANCEL_VIOLATION";
	else
	if (i_error==174L) risultato="ERROR_ATOMIC_LOCKS_NOT_SUPPORTED";
	else
	if (i_error==180L) risultato="ERROR_INVALID_SEGMENT_NUMBER";
	else
	if (i_error==182L) risultato="ERROR_INVALID_ORDINAL";
	else
	if (i_error==183L) risultato="ERROR_ALREADY_EXISTS";
	else
	if (i_error==186L) risultato="ERROR_INVALID_FLAG_NUMBER";
	else
	if (i_error==187L) risultato="ERROR_SEM_NOT_FOUND";
	else
	if (i_error==188L) risultato="ERROR_INVALID_STARTING_CODESEG";
	else
	if (i_error==189L) risultato="ERROR_INVALID_STACKSEG";
	else
	if (i_error==190L) risultato="ERROR_INVALID_MODULETYPE";
	else
	if (i_error==191L) risultato="ERROR_INVALID_EXE_SIGNATURE";
	else
	if (i_error==192L) risultato="ERROR_EXE_MARKED_INVALID";
	else
	if (i_error==193L) risultato="ERROR_BAD_EXE_FORMAT";
	else
	if (i_error==194L) risultato="ERROR_ITERATED_DATA_EXCEEDS_64k";
	else
	if (i_error==195L) risultato="ERROR_INVALID_MINALLOCSIZE";
	else
	if (i_error==196L) risultato="ERROR_DYNLINK_FROM_INVALID_RING";
	else
	if (i_error==197L) risultato="ERROR_IOPL_NOT_ENABLED";
	else
	if (i_error==198L) risultato="ERROR_INVALID_SEGDPL";
	else
	if (i_error==199L) risultato="ERROR_AUTODATASEG_EXCEEDS_64k";
	else
	if (i_error==200L) risultato="ERROR_RING2SEG_MUST_BE_MOVABLE";
	else
	if (i_error==201L) risultato="ERROR_RELOC_CHAIN_XEEDS_SEGLIM";
	else
	if (i_error==202L) risultato="ERROR_INFLOOP_IN_RELOC_CHAIN";
	else
	if (i_error==203L) risultato="ERROR_ENVVAR_NOT_FOUND";
	else
	if (i_error==205L) risultato="ERROR_NO_SIGNAL_SENT";
	else
	if (i_error==206L) risultato="ERROR_FILENAME_EXCED_RANGE";
	else
	if (i_error==207L) risultato="ERROR_RING2_STACK_IN_USE";
	else
	if (i_error==208L) risultato="ERROR_META_EXPANSION_TOO_LONG";
	else
	if (i_error==209L) risultato="ERROR_INVALID_SIGNAL_NUMBER";
	else
	if (i_error==210L) risultato="ERROR_THREAD_1_INACTIVE";
	else
	if (i_error==212L) risultato="ERROR_LOCKED";
	else
	if (i_error==214L) risultato="ERROR_TOO_MANY_MODULES";
	else
	if (i_error==215L) risultato="ERROR_NESTING_NOT_ALLOWED";
	else
	if (i_error==216L) risultato="ERROR_EXE_MACHINE_TYPE_MISMATCH";
	else
	if (i_error==217L) risultato="ERROR_EXE_CANNOT_MODIFY_SIGNED_BINARY";
	else
	if (i_error==218L) risultato="ERROR_EXE_CANNOT_MODIFY_STRONG_SIGNED_BINARY";
	else
	if (i_error==220L) risultato="ERROR_FILE_CHECKED_OUT";
	else
	if (i_error==221L) risultato="ERROR_CHECKOUT_REQUIRED";
	else
	if (i_error==222L) risultato="ERROR_BAD_FILE_TYPE";
	else
	if (i_error==223L) risultato="ERROR_FILE_TOO_LARGE";
	else
	if (i_error==224L) risultato="ERROR_FORMS_AUTH_REQUIRED";
	else
	if (i_error==225L) risultato="ERROR_VIRUS_INFECTED";
	else
	if (i_error==226L) risultato="ERROR_VIRUS_DELETED";
	else
	if (i_error==229L) risultato="ERROR_PIPE_LOCAL";
	else
	if (i_error==230L) risultato="ERROR_BAD_PIPE";
	else
	if (i_error==231L) risultato="ERROR_PIPE_BUSY";
	else
	if (i_error==232L) risultato="ERROR_NO_DATA";
	else
	if (i_error==233L) risultato="ERROR_PIPE_NOT_CONNECTED";
	else
	if (i_error==234L) risultato="ERROR_MORE_DATA";
	else
	if (i_error==235L) risultato="ERROR_NO_WORK_DONE";
	else
	if (i_error==240L) risultato="ERROR_VC_DISCONNECTED";
	else
	if (i_error==254L) risultato="ERROR_INVALID_EA_NAME";
	else
	if (i_error==255L) risultato="ERROR_EA_LISTINCONSISTENT";
	else
	if (i_error==258L) risultato="WAIT_TIMEOUT";
	else
	if (i_error==259L) risultato="ERROR_NO_MORE_ITEMS";
	else
	if (i_error==266L) risultato="ERROR_CANNOT_COPY";
	else
	if (i_error==267L) risultato="ERROR_DIRECTORY";
	else
	if (i_error==275L) risultato="ERROR_EAS_DIDNT_FIT";
	else
	if (i_error==276L) risultato="ERROR_EA_FILE_CORRUPT";
	else
	if (i_error==277L) risultato="ERROR_EA_TABLE_FULL";
	else
	if (i_error==278L) risultato="ERROR_INVALID_EA_HANDLE";
	else
	if (i_error==282L) risultato="ERROR_EAS_NOT_SUPPORTED";
	else
	if (i_error==288L) risultato="ERROR_NOT_OWNER";
	else
	if (i_error==298L) risultato="ERROR_TOO_MANY_POSTS";
	else
	if (i_error==299L) risultato="ERROR_PARTIAL_COPY";
	else
	if (i_error==300L) risultato="ERROR_OPLOCK_NOT_GRANTED";
	else
	if (i_error==301L) risultato="ERROR_INVALID_OPLOCK_PROTOCOL";
	else
	if (i_error==302L) risultato="ERROR_DISK_TOO_FRAGMENTED";
	else
	if (i_error==303L) risultato="ERROR_DELETE_PENDING";
	else
	if (i_error==304L) risultato="ERROR_INCOMPATIBLE_WITH_GLOBAL_SHORT_NAME_REGISTRY_SETTING";
	else
	if (i_error==305L) risultato="ERROR_SHORT_NAMES_NOT_ENABLED_ON_VOLUME";
	else
	if (i_error==306L) risultato="ERROR_SECURITY_STREAM_IS_INCONSISTENT";
	else
	if (i_error==307L) risultato="ERROR_INVALID_LOCK_RANGE";
	else
	if (i_error==308L) risultato="ERROR_IMAGE_SUBSYSTEM_NOT_PRESENT";
	else
	if (i_error==309L) risultato="ERROR_NOTIFICATION_GUID_ALREADY_DEFINED";
	else
	if (i_error==310L) risultato="ERROR_INVALID_EXCEPTION_HANDLER";
	else
	if (i_error==311L) risultato="ERROR_DUPLICATE_PRIVILEGES";
	else
	if (i_error==312L) risultato="ERROR_NO_RANGES_PROCESSED";
	else
	if (i_error==313L) risultato="ERROR_NOT_ALLOWED_ON_SYSTEM_FILE";
	else
	if (i_error==314L) risultato="ERROR_DISK_RESOURCES_EXHAUSTED";
	else
	if (i_error==315L) risultato="ERROR_INVALID_TOKEN";
	else
	if (i_error==316L) risultato="ERROR_DEVICE_FEATURE_NOT_SUPPORTED";
	else
	if (i_error==317L) risultato="ERROR_MR_MID_NOT_FOUND";
	else
	if (i_error==318L) risultato="ERROR_SCOPE_NOT_FOUND";
	else
	if (i_error==319L) risultato="ERROR_UNDEFINED_SCOPE";
	else
	if (i_error==320L) risultato="ERROR_INVALID_CAP";
	else
	if (i_error==321L) risultato="ERROR_DEVICE_UNREACHABLE";
	else
	if (i_error==322L) risultato="ERROR_DEVICE_NO_RESOURCES";
	else
	if (i_error==323L) risultato="ERROR_DATA_CHECKSUM_ERROR";
	else
	if (i_error==324L) risultato="ERROR_INTERMIXED_KERNEL_EA_OPERATION";
	else
	if (i_error==326L) risultato="ERROR_FILE_LEVEL_TRIM_NOT_SUPPORTED";
	else
	if (i_error==327L) risultato="ERROR_OFFSET_ALIGNMENT_VIOLATION";
	else
	if (i_error==328L) risultato="ERROR_INVALID_FIELD_IN_PARAMETER_LIST";
	else
	if (i_error==329L) risultato="ERROR_OPERATION_IN_PROGRESS";
	else
	if (i_error==330L) risultato="ERROR_BAD_DEVICE_PATH";
	else
	if (i_error==331L) risultato="ERROR_TOO_MANY_DESCRIPTORS";
	else
	if (i_error==332L) risultato="ERROR_SCRUB_DATA_DISABLED";
	else
	if (i_error==333L) risultato="ERROR_NOT_REDUNDANT_STORAGE";
	else
	if (i_error==334L) risultato="ERROR_RESIDENT_FILE_NOT_SUPPORTED";
	else
	if (i_error==335L) risultato="ERROR_COMPRESSED_FILE_NOT_SUPPORTED";
	else
	if (i_error==336L) risultato="ERROR_DIRECTORY_NOT_SUPPORTED";
	else
	if (i_error==337L) risultato="ERROR_NOT_READ_FROM_COPY";
	else
	if (i_error==338L) risultato="ERROR_FT_WRITE_FAILURE";
	else
	if (i_error==339L) risultato="ERROR_FT_DI_SCAN_REQUIRED";
	else
	if (i_error==340L) risultato="ERROR_INVALID_KERNEL_INFO_VERSION";
	else
	if (i_error==341L) risultato="ERROR_INVALID_PEP_INFO_VERSION";
	else
	if (i_error==342L) risultato="ERROR_OBJECT_NOT_EXTERNALLY_BACKED";
	else
	if (i_error==343L) risultato="ERROR_EXTERNAL_BACKING_PROVIDER_UNKNOWN";
	else
	if (i_error==344L) risultato="ERROR_COMPRESSION_NOT_BENEFICIAL";
	else
	if (i_error==345L) risultato="ERROR_STORAGE_TOPOLOGY_ID_MISMATCH";
	else
	if (i_error==346L) risultato="ERROR_BLOCKED_BY_PARENTAL_CONTROLS";
	else
	if (i_error==347L) risultato="ERROR_BLOCK_TOO_MANY_REFERENCES";
	else
	if (i_error==348L) risultato="ERROR_MARKED_TO_DISALLOW_WRITES";
	else
	if (i_error==349L) risultato="ERROR_ENCLAVE_FAILURE";
	else
	if (i_error==350L) risultato="ERROR_FAIL_NOACTION_REBOOT";
	else
	if (i_error==351L) risultato="ERROR_FAIL_SHUTDOWN";
	else
	if (i_error==352L) risultato="ERROR_FAIL_RESTART";
	else
	if (i_error==353L) risultato="ERROR_MAX_SESSIONS_REACHED";
	else
	if (i_error==354L) risultato="ERROR_NETWORK_ACCESS_DENIED_EDP";
	else
	if (i_error==355L) risultato="ERROR_DEVICE_HINT_NAME_BUFFER_TOO_SMALL";
	else
	if (i_error==356L) risultato="ERROR_EDP_POLICY_DENIES_OPERATION";
	else
	if (i_error==357L) risultato="ERROR_EDP_DPL_POLICY_CANT_BE_SATISFIED";
	else
	if (i_error==358L) risultato="ERROR_CLOUD_FILE_PROVIDER_UNKNOWN";
	else
	if (i_error==359L) risultato="ERROR_DEVICE_IN_MAINTENANCE";
	else
	if (i_error==360L) risultato="ERROR_NOT_SUPPORTED_ON_DAX";
	else
	if (i_error==361L) risultato="ERROR_DAX_MAPPING_EXISTS";
	else
	if (i_error==362L) risultato="ERROR_CLOUD_FILE_PROVIDER_NOT_RUNNING";
	else
	if (i_error==363L) risultato="ERROR_CLOUD_FILE_METADATA_CORRUPT";
	else
	if (i_error==364L) risultato="ERROR_CLOUD_FILE_METADATA_TOO_LARGE";
	else
	if (i_error==365L) risultato="ERROR_CLOUD_FILE_PROPERTY_BLOB_TOO_LARGE";
	else
	if (i_error==366L) risultato="ERROR_CLOUD_FILE_PROPERTY_BLOB_CHECKSUM_MISMATCH";
	else
	if (i_error==367L) risultato="ERROR_CHILD_PROCESS_BLOCKED";
	else
	if (i_error==368L) risultato="ERROR_STORAGE_LOST_DATA_PERSISTENCE";
	else
	if (i_error==400L) risultato="ERROR_THREAD_MODE_ALREADY_BACKGROUND";
	else
	if (i_error==401L) risultato="ERROR_THREAD_MODE_NOT_BACKGROUND";
	else
	if (i_error==402L) risultato="ERROR_PROCESS_MODE_ALREADY_BACKGROUND";
	else
	if (i_error==403L) risultato="ERROR_PROCESS_MODE_NOT_BACKGROUND";
	else
	if (i_error==450L) risultato="ERROR_CAPAUTHZ_NOT_DEVUNLOCKED";
	else
	if (i_error==451L) risultato="ERROR_CAPAUTHZ_CHANGE_TYPE";
	else
	if (i_error==452L) risultato="ERROR_CAPAUTHZ_NOT_PROVISIONED";
	else
	if (i_error==453L) risultato="ERROR_CAPAUTHZ_NOT_AUTHORIZED";
	else
	if (i_error==454L) risultato="ERROR_CAPAUTHZ_NO_POLICY";
	else
	if (i_error==455L) risultato="ERROR_CAPAUTHZ_DB_CORRUPTED";
	else
	if (i_error==483L) risultato="ERROR_DEVICE_HARDWARE_ERROR";
	else
	if (i_error==487L) risultato="ERROR_INVALID_ADDRESS";
	else
	if (i_error==500L) risultato="ERROR_USER_PROFILE_LOAD";
	else
	if (i_error==534L) risultato="ERROR_ARITHMETIC_OVERFLOW";
	else
	if (i_error==535L) risultato="ERROR_PIPE_CONNECTED";
	else
	if (i_error==536L) risultato="ERROR_PIPE_LISTENING";
	else
	if (i_error==537L) risultato="ERROR_VERIFIER_STOP";
	else
	if (i_error==538L) risultato="ERROR_ABIOS_ERROR";
	else
	if (i_error==539L) risultato="ERROR_WX86_WARNING";
	else
	if (i_error==540L) risultato="ERROR_WX86_ERROR";
	else
	if (i_error==541L) risultato="ERROR_TIMER_NOT_CANCELED";
	else
	if (i_error==542L) risultato="ERROR_UNWIND";
	else
	if (i_error==543L) risultato="ERROR_BAD_STACK";
	else
	if (i_error==544L) risultato="ERROR_INVALID_UNWIND_TARGET";
	else
	if (i_error==545L) risultato="ERROR_INVALID_PORT_ATTRIBUTES";
	else
	if (i_error==546L) risultato="ERROR_PORT_MESSAGE_TOO_LONG";
	else
	if (i_error==547L) risultato="ERROR_INVALID_QUOTA_LOWER";
	else
	if (i_error==548L) risultato="ERROR_DEVICE_ALREADY_ATTACHED";
	else
	if (i_error==549L) risultato="ERROR_INSTRUCTION_MISALIGNMENT";
	else
	if (i_error==550L) risultato="ERROR_PROFILING_NOT_STARTED";
	else
	if (i_error==551L) risultato="ERROR_PROFILING_NOT_STOPPED";
	else
	if (i_error==552L) risultato="ERROR_COULD_NOT_INTERPRET";
	else
	if (i_error==553L) risultato="ERROR_PROFILING_AT_LIMIT";
	else
	if (i_error==554L) risultato="ERROR_CANT_WAIT";
	else
	if (i_error==555L) risultato="ERROR_CANT_TERMINATE_SELF";
	else
	if (i_error==556L) risultato="ERROR_UNEXPECTED_MM_CREATE_ERR";
	else
	if (i_error==557L) risultato="ERROR_UNEXPECTED_MM_MAP_ERROR";
	else
	if (i_error==558L) risultato="ERROR_UNEXPECTED_MM_EXTEND_ERR";
	else
	if (i_error==559L) risultato="ERROR_BAD_FUNCTION_TABLE";
	else
	if (i_error==560L) risultato="ERROR_NO_GUID_TRANSLATION";
	else
	if (i_error==561L) risultato="ERROR_INVALID_LDT_SIZE";
	else
	if (i_error==563L) risultato="ERROR_INVALID_LDT_OFFSET";
	else
	if (i_error==564L) risultato="ERROR_INVALID_LDT_DESCRIPTOR";
	else
	if (i_error==565L) risultato="ERROR_TOO_MANY_THREADS";
	else
	if (i_error==566L) risultato="ERROR_THREAD_NOT_IN_PROCESS";
	else
	if (i_error==567L) risultato="ERROR_PAGEFILE_QUOTA_EXCEEDED";
	else
	if (i_error==568L) risultato="ERROR_LOGON_SERVER_CONFLICT";
	else
	if (i_error==569L) risultato="ERROR_SYNCHRONIZATION_REQUIRED";
	else
	if (i_error==570L) risultato="ERROR_NET_OPEN_FAILED";
	else
	if (i_error==571L) risultato="ERROR_IO_PRIVILEGE_FAILED";
	else
	if (i_error==572L) risultato="ERROR_CONTROL_C_EXIT";
	else
	if (i_error==573L) risultato="ERROR_MISSING_SYSTEMFILE";
	else
	if (i_error==574L) risultato="ERROR_UNHANDLED_EXCEPTION";
	else
	if (i_error==575L) risultato="ERROR_APP_INIT_FAILURE";
	else
	if (i_error==576L) risultato="ERROR_PAGEFILE_CREATE_FAILED";
	else
	if (i_error==577L) risultato="ERROR_INVALID_IMAGE_HASH";
	else
	if (i_error==578L) risultato="ERROR_NO_PAGEFILE";
	else
	if (i_error==579L) risultato="ERROR_ILLEGAL_FLOAT_CONTEXT";
	else
	if (i_error==580L) risultato="ERROR_NO_EVENT_PAIR";
	else
	if (i_error==581L) risultato="ERROR_DOMAIN_CTRLR_CONFIG_ERROR";
	else
	if (i_error==582L) risultato="ERROR_ILLEGAL_CHARACTER";
	else
	if (i_error==583L) risultato="ERROR_UNDEFINED_CHARACTER";
	else
	if (i_error==584L) risultato="ERROR_FLOPPY_VOLUME";
	else
	if (i_error==585L) risultato="ERROR_BIOS_FAILED_TO_CONNECT_INTERRUPT";
	else
	if (i_error==586L) risultato="ERROR_BACKUP_CONTROLLER";
	else
	if (i_error==587L) risultato="ERROR_MUTANT_LIMIT_EXCEEDED";
	else
	if (i_error==588L) risultato="ERROR_FS_DRIVER_REQUIRED";
	else
	if (i_error==589L) risultato="ERROR_CANNOT_LOAD_REGISTRY_FILE";
	else
	if (i_error==590L) risultato="ERROR_DEBUG_ATTACH_FAILED";
	else
	if (i_error==591L) risultato="ERROR_SYSTEM_PROCESS_TERMINATED";
	else
	if (i_error==592L) risultato="ERROR_DATA_NOT_ACCEPTED";
	else
	if (i_error==593L) risultato="ERROR_VDM_HARD_ERROR";
	else
	if (i_error==594L) risultato="ERROR_DRIVER_CANCEL_TIMEOUT";
	else
	if (i_error==595L) risultato="ERROR_REPLY_MESSAGE_MISMATCH";
	else
	if (i_error==596L) risultato="ERROR_LOST_WRITEBEHIND_DATA";
	else
	if (i_error==597L) risultato="ERROR_CLIENT_SERVER_PARAMETERS_INVALID";
	else
	if (i_error==598L) risultato="ERROR_NOT_TINY_STREAM";
	else
	if (i_error==599L) risultato="ERROR_STACK_OVERFLOW_READ";
	else
	if (i_error==600L) risultato="ERROR_CONVERT_TO_LARGE";
	else
	if (i_error==601L) risultato="ERROR_FOUND_OUT_OF_SCOPE";
	else
	if (i_error==602L) risultato="ERROR_ALLOCATE_BUCKET";
	else
	if (i_error==603L) risultato="ERROR_MARSHALL_OVERFLOW";
	else
	if (i_error==604L) risultato="ERROR_INVALID_VARIANT";
	else
	if (i_error==605L) risultato="ERROR_BAD_COMPRESSION_BUFFER";
	else
	if (i_error==606L) risultato="ERROR_AUDIT_FAILED";
	else
	if (i_error==607L) risultato="ERROR_TIMER_RESOLUTION_NOT_SET";
	else
	if (i_error==608L) risultato="ERROR_INSUFFICIENT_LOGON_INFO";
	else
	if (i_error==609L) risultato="ERROR_BAD_DLL_ENTRYPOINT";
	else
	if (i_error==610L) risultato="ERROR_BAD_SERVICE_ENTRYPOINT";
	else
	if (i_error==611L) risultato="ERROR_IP_ADDRESS_CONFLICT1";
	else
	if (i_error==612L) risultato="ERROR_IP_ADDRESS_CONFLICT2";
	else
	if (i_error==613L) risultato="ERROR_REGISTRY_QUOTA_LIMIT";
	else
	if (i_error==614L) risultato="ERROR_NO_CALLBACK_ACTIVE";
	else
	if (i_error==615L) risultato="ERROR_PWD_TOO_SHORT";
	else
	if (i_error==616L) risultato="ERROR_PWD_TOO_RECENT";
	else
	if (i_error==617L) risultato="ERROR_PWD_HISTORY_CONFLICT";
	else
	if (i_error==618L) risultato="ERROR_UNSUPPORTED_COMPRESSION";
	else
	if (i_error==619L) risultato="ERROR_INVALID_HW_PROFILE";
	else
	if (i_error==620L) risultato="ERROR_INVALID_PLUGPLAY_DEVICE_PATH";
	else
	if (i_error==621L) risultato="ERROR_QUOTA_LISTINCONSISTENT";
	else
	if (i_error==622L) risultato="ERROR_EVALUATION_EXPIRATION";
	else
	if (i_error==623L) risultato="ERROR_ILLEGAL_DLL_RELOCATION";
	else
	if (i_error==624L) risultato="ERROR_DLL_INIT_FAILED_LOGOFF";
	else
	if (i_error==625L) risultato="ERROR_VALIDATE_CONTINUE";
	else
	if (i_error==626L) risultato="ERROR_NO_MORE_MATCHES";
	else
	if (i_error==627L) risultato="ERROR_RANGE_LISTCONFLICT";
	else
	if (i_error==628L) risultato="ERROR_SERVER_SID_MISMATCH";
	else
	if (i_error==629L) risultato="ERROR_CANT_ENABLE_DENY_ONLY";
	else
	if (i_error==630L) risultato="ERROR_FLOAT_MULTIPLE_FAULTS";
	else
	if (i_error==631L) risultato="ERROR_FLOAT_MULTIPLE_TRAPS";
	else
	if (i_error==632L) risultato="ERROR_NOINTERFACE";
	else
	if (i_error==633L) risultato="ERROR_DRIVER_FAILED_SLEEP";
	else
	if (i_error==634L) risultato="ERROR_CORRUPT_SYSTEM_FILE";
	else
	if (i_error==635L) risultato="ERROR_COMMITMENT_MINIMUM";
	else
	if (i_error==636L) risultato="ERROR_PNP_RESTART_ENUMERATION";
	else
	if (i_error==637L) risultato="ERROR_SYSTEM_IMAGE_BAD_SIGNATURE";
	else
	if (i_error==638L) risultato="ERROR_PNP_REBOOT_REQUIRED";
	else
	if (i_error==639L) risultato="ERROR_INSUFFICIENT_POWER";
	else
	if (i_error==640L) risultato="ERROR_MULTIPLE_FAULT_VIOLATION";
	else
	if (i_error==641L) risultato="ERROR_SYSTEM_SHUTDOWN";
	else
	if (i_error==642L) risultato="ERROR_PORT_NOT_SET";
	else
	if (i_error==643L) risultato="ERROR_DS_VERSION_CHECK_FAILURE";
	else
	if (i_error==644L) risultato="ERROR_RANGE_NOT_FOUND";
	else
	if (i_error==646L) risultato="ERROR_NOT_SAFE_MODE_DRIVER";
	else
	if (i_error==647L) risultato="ERROR_FAILED_DRIVER_ENTRY";
	else
	if (i_error==648L) risultato="ERROR_DEVICE_ENUMERATION_ERROR";
	else
	if (i_error==649L) risultato="ERROR_MOUNT_POINT_NOT_RESOLVED";
	else
	if (i_error==650L) risultato="ERROR_INVALID_DEVICE_OBJECT_PARAMETER";
	else
	if (i_error==651L) risultato="ERROR_MCA_OCCURED";
	else
	if (i_error==652L) risultato="ERROR_DRIVER_DATABASE_ERROR";
	else
	if (i_error==653L) risultato="ERROR_SYSTEM_HIVE_TOO_LARGE";
	else
	if (i_error==654L) risultato="ERROR_DRIVER_FAILED_PRIOR_UNLOAD";
	else
	if (i_error==655L) risultato="ERROR_VOLSNAP_PREPARE_HIBERNATE";
	else
	if (i_error==656L) risultato="ERROR_HIBERNATION_FAILURE";
	else
	if (i_error==657L) risultato="ERROR_PWD_TOO_LONG";
	else
	if (i_error==665L) risultato="ERROR_FILE_SYSTEM_LIMITATION";
	else
	if (i_error==668L) risultato="ERROR_ASSERTION_FAILURE";
	else
	if (i_error==669L) risultato="ERROR_ACPI_ERROR";
	else
	if (i_error==670L) risultato="ERROR_WOW_ASSERTION";
	else
	if (i_error==671L) risultato="ERROR_PNP_BAD_MPS_TABLE";
	else
	if (i_error==672L) risultato="ERROR_PNP_TRANSLATION_FAILED";
	else
	if (i_error==673L) risultato="ERROR_PNP_IRQ_TRANSLATION_FAILED";
	else
	if (i_error==674L) risultato="ERROR_PNP_INVALID_ID";
	else
	if (i_error==675L) risultato="ERROR_WAKE_SYSTEM_DEBUGGER";
	else
	if (i_error==676L) risultato="ERROR_HANDLES_CLOSED";
	else
	if (i_error==677L) risultato="ERROR_EXTRANEOUS_INFORMATION";
	else
	if (i_error==678L) risultato="ERROR_RXACT_COMMIT_NECESSARY";
	else
	if (i_error==679L) risultato="ERROR_MEDIA_CHECK";
	else
	if (i_error==680L) risultato="ERROR_GUID_SUBSTITUTION_MADE";
	else
	if (i_error==681L) risultato="ERROR_STOPPED_ON_SYMLINK";
	else
	if (i_error==682L) risultato="ERROR_LONGJUMP";
	else
	if (i_error==683L) risultato="ERROR_PLUGPLAY_QUERY_VETOED";
	else
	if (i_error==684L) risultato="ERROR_UNWIND_CONSOLIDATE";
	else
	if (i_error==685L) risultato="ERROR_REGISTRY_HIVE_RECOVERED";
	else
	if (i_error==686L) risultato="ERROR_DLL_MIGHT_BE_INSECURE";
	else
	if (i_error==687L) risultato="ERROR_DLL_MIGHT_BE_INCOMPATIBLE";
	else
	if (i_error==688L) risultato="ERROR_DBG_EXCEPTION_NOT_HANDLED";
	else
	if (i_error==689L) risultato="ERROR_DBG_REPLY_LATER";
	else
	if (i_error==690L) risultato="ERROR_DBG_UNABLE_TO_PROVIDE_HANDLE";
	else
	if (i_error==691L) risultato="ERROR_DBG_TERMINATE_THREAD";
	else
	if (i_error==692L) risultato="ERROR_DBG_TERMINATE_PROCESS";
	else
	if (i_error==693L) risultato="ERROR_DBG_CONTROL_C";
	else
	if (i_error==694L) risultato="ERROR_DBG_PRINTEXCEPTION_C";
	else
	if (i_error==695L) risultato="ERROR_DBG_RIPEXCEPTION";
	else
	if (i_error==696L) risultato="ERROR_DBG_CONTROL_BREAK";
	else
	if (i_error==697L) risultato="ERROR_DBG_COMMAND_EXCEPTION";
	else
	if (i_error==698L) risultato="ERROR_OBJECT_NAME_EXISTS";
	else
	if (i_error==699L) risultato="ERROR_THREAD_WAS_SUSPENDED";
	else
	if (i_error==700L) risultato="ERROR_IMAGE_NOT_AT_BASE";
	else
	if (i_error==701L) risultato="ERROR_RXACT_STATE_CREATED";
	else
	if (i_error==702L) risultato="ERROR_SEGMENT_NOTIFICATION";
	else
	if (i_error==703L) risultato="ERROR_BAD_CURRENT_DIRECTORY";
	else
	if (i_error==704L) risultato="ERROR_FT_READ_RECOVERY_FROM_BACKUP";
	else
	if (i_error==705L) risultato="ERROR_FT_WRITE_RECOVERY";
	else
	if (i_error==706L) risultato="ERROR_IMAGE_MACHINE_TYPE_MISMATCH";
	else
	if (i_error==707L) risultato="ERROR_RECEIVE_PARTIAL";
	else
	if (i_error==708L) risultato="ERROR_RECEIVE_EXPEDITED";
	else
	if (i_error==709L) risultato="ERROR_RECEIVE_PARTIAL_EXPEDITED";
	else
	if (i_error==710L) risultato="ERROR_EVENT_DONE";
	else
	if (i_error==711L) risultato="ERROR_EVENT_PENDING";
	else
	if (i_error==712L) risultato="ERROR_CHECKING_FILE_SYSTEM";
	else
	if (i_error==713L) risultato="ERROR_FATAL_APP_EXIT";
	else
	if (i_error==714L) risultato="ERROR_PREDEFINED_HANDLE";
	else
	if (i_error==715L) risultato="ERROR_WAS_UNLOCKED";
	else
	if (i_error==716L) risultato="ERROR_SERVICE_NOTIFICATION";
	else
	if (i_error==717L) risultato="ERROR_WAS_LOCKED";
	else
	if (i_error==718L) risultato="ERROR_LOG_HARD_ERROR";
	else
	if (i_error==719L) risultato="ERROR_ALREADY_WIN32";
	else
	if (i_error==720L) risultato="ERROR_IMAGE_MACHINE_TYPE_MISMATCH_EXE";
	else
	if (i_error==721L) risultato="ERROR_NO_YIELD_PERFORMED";
	else
	if (i_error==722L) risultato="ERROR_TIMER_RESUME_IGNORED";
	else
	if (i_error==723L) risultato="ERROR_ARBITRATION_UNHANDLED";
	else
	if (i_error==724L) risultato="ERROR_CARDBUS_NOT_SUPPORTED";
	else
	if (i_error==725L) risultato="ERROR_MP_PROCESSOR_MISMATCH";
	else
	if (i_error==726L) risultato="ERROR_HIBERNATED";
	else
	if (i_error==727L) risultato="ERROR_RESUME_HIBERNATION";
	else
	if (i_error==728L) risultato="ERROR_FIRMWARE_UPDATED";
	else
	if (i_error==729L) risultato="ERROR_DRIVERS_LEAKING_LOCKED_PAGES";
	else
	if (i_error==730L) risultato="ERROR_WAKE_SYSTEM";
	else
	if (i_error==731L) risultato="ERROR_WAIT_1";
	else
	if (i_error==732L) risultato="ERROR_WAIT_2";
	else
	if (i_error==733L) risultato="ERROR_WAIT_3";
	else
	if (i_error==734L) risultato="ERROR_WAIT_63";
	else
	if (i_error==735L) risultato="ERROR_ABANDONED_WAIT_0";
	else
	if (i_error==736L) risultato="ERROR_ABANDONED_WAIT_63";
	else
	if (i_error==737L) risultato="ERROR_USER_APC";
	else
	if (i_error==738L) risultato="ERROR_KERNEL_APC";
	else
	if (i_error==739L) risultato="ERROR_ALERTED";
	else
	if (i_error==740L) risultato="ERROR_ELEVATION_REQUIRED";
	else
	if (i_error==741L) risultato="ERROR_REPARSE";
	else
	if (i_error==742L) risultato="ERROR_OPLOCK_BREAK_IN_PROGRESS";
	else
	if (i_error==743L) risultato="ERROR_VOLUME_MOUNTED";
	else
	if (i_error==744L) risultato="ERROR_RXACT_COMMITTED";
	else
	if (i_error==745L) risultato="ERROR_NOTIFY_CLEANUP";
	else
	if (i_error==746L) risultato="ERROR_PRIMARY_TRANSPORT_CONNECT_FAILED";
	else
	if (i_error==747L) risultato="ERROR_PAGE_FAULT_TRANSITION";
	else
	if (i_error==748L) risultato="ERROR_PAGE_FAULT_DEMAND_ZERO";
	else
	if (i_error==749L) risultato="ERROR_PAGE_FAULT_COPY_ON_WRITE";
	else
	if (i_error==750L) risultato="ERROR_PAGE_FAULT_GUARD_PAGE";
	else
	if (i_error==751L) risultato="ERROR_PAGE_FAULT_PAGING_FILE";
	else
	if (i_error==752L) risultato="ERROR_CACHE_PAGE_LOCKED";
	else
	if (i_error==753L) risultato="ERROR_CRASH_DUMP";
	else
	if (i_error==754L) risultato="ERROR_BUFFER_ALL_ZEROS";
	else
	if (i_error==755L) risultato="ERROR_REPARSE_OBJECT";
	else
	if (i_error==756L) risultato="ERROR_RESOURCE_REQUIREMENTS_CHANGED";
	else
	if (i_error==757L) risultato="ERROR_TRANSLATION_COMPLETE";
	else
	if (i_error==758L) risultato="ERROR_NOTHING_TO_TERMINATE";
	else
	if (i_error==759L) risultato="ERROR_PROCESS_NOT_IN_JOB";
	else
	if (i_error==760L) risultato="ERROR_PROCESS_IN_JOB";
	else
	if (i_error==761L) risultato="ERROR_VOLSNAP_HIBERNATE_READY";
	else
	if (i_error==762L) risultato="ERROR_FSFILTER_OP_COMPLETED_SUCCESSFULLY";
	else
	if (i_error==763L) risultato="ERROR_INTERRUPT_VECTOR_ALREADY_CONNECTED";
	else
	if (i_error==764L) risultato="ERROR_INTERRUPT_STILL_CONNECTED";
	else
	if (i_error==765L) risultato="ERROR_WAIT_FOR_OPLOCK";
	else
	if (i_error==766L) risultato="ERROR_DBG_EXCEPTION_HANDLED";
	else
	if (i_error==767L) risultato="ERROR_DBG_CONTINUE";
	else
	if (i_error==768L) risultato="ERROR_CALLBACK_POP_STACK";
	else
	if (i_error==769L) risultato="ERROR_COMPRESSION_DISABLED";
	else
	if (i_error==770L) risultato="ERROR_CANTFETCHBACKWARDS";
	else
	if (i_error==771L) risultato="ERROR_CANTSCROLLBACKWARDS";
	else
	if (i_error==772L) risultato="ERROR_ROWSNOTRELEASED";
	else
	if (i_error==773L) risultato="ERROR_BAD_ACCESSOR_FLAGS";
	else
	if (i_error==774L) risultato="ERROR_ERRORS_ENCOUNTERED";
	else
	if (i_error==775L) risultato="ERROR_NOT_CAPABLE";
	else
	if (i_error==776L) risultato="ERROR_REQUEST_OUT_OF_SEQUENCE";
	else
	if (i_error==777L) risultato="ERROR_VERSION_PARSE_ERROR";
	else
	if (i_error==778L) risultato="ERROR_BADSTARTPOSITION";
	else
	if (i_error==779L) risultato="ERROR_MEMORY_HARDWARE";
	else
	if (i_error==780L) risultato="ERROR_DISK_REPAIR_DISABLED";
	else
	if (i_error==781L) risultato="ERROR_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE";
	else
	if (i_error==782L) risultato="ERROR_SYSTEM_POWERSTATE_TRANSITION";
	else
	if (i_error==783L) risultato="ERROR_SYSTEM_POWERSTATE_COMPLEX_TRANSITION";
	else
	if (i_error==784L) risultato="ERROR_MCA_EXCEPTION";
	else
	if (i_error==785L) risultato="ERROR_ACCESS_AUDIT_BY_POLICY";
	else
	if (i_error==786L) risultato="ERROR_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY";
	else
	if (i_error==787L) risultato="ERROR_ABANDON_HIBERFILE";
	else
	if (i_error==788L) risultato="ERROR_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED";
	else
	if (i_error==789L) risultato="ERROR_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR";
	else
	if (i_error==790L) risultato="ERROR_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR";
	else
	if (i_error==791L) risultato="ERROR_BAD_MCFG_TABLE";
	else
	if (i_error==792L) risultato="ERROR_DISK_REPAIR_REDIRECTED";
	else
	if (i_error==793L) risultato="ERROR_DISK_REPAIR_UNSUCCESSFUL";
	else
	if (i_error==794L) risultato="ERROR_CORRUPT_LOG_OVERFULL";
	else
	if (i_error==795L) risultato="ERROR_CORRUPT_LOG_CORRUPTED";
	else
	if (i_error==796L) risultato="ERROR_CORRUPT_LOG_UNAVAILABLE";
	else
	if (i_error==797L) risultato="ERROR_CORRUPT_LOG_DELETED_FULL";
	else
	if (i_error==798L) risultato="ERROR_CORRUPT_LOG_CLEARED";
	else
	if (i_error==799L) risultato="ERROR_ORPHAN_NAME_EXHAUSTED";
	else
	if (i_error==800L) risultato="ERROR_OPLOCK_SWITCHED_TO_NEW_HANDLE";
	else
	if (i_error==801L) risultato="ERROR_CANNOT_GRANT_REQUESTED_OPLOCK";
	else
	if (i_error==802L) risultato="ERROR_CANNOT_BREAK_OPLOCK";
	else
	if (i_error==803L) risultato="ERROR_OPLOCK_HANDLE_CLOSED";
	else
	if (i_error==804L) risultato="ERROR_NO_ACE_CONDITION";
	else
	if (i_error==805L) risultato="ERROR_INVALID_ACE_CONDITION";
	else
	if (i_error==806L) risultato="ERROR_FILE_HANDLE_REVOKED";
	else
	if (i_error==807L) risultato="ERROR_IMAGE_AT_DIFFERENT_BASE";
	else
	if (i_error==808L) risultato="ERROR_ENCRYPTED_IO_NOT_POSSIBLE";
	else
	if (i_error==809L) risultato="ERROR_FILE_METADATA_OPTIMIZATION_IN_PROGRESS";
	else
	if (i_error==810L) risultato="ERROR_QUOTA_ACTIVITY";
	else
	if (i_error==811L) risultato="ERROR_HANDLE_REVOKED";
	else
	if (i_error==812L) risultato="ERROR_CALLBACK_INVOKE_INLINE";
	else
	if (i_error==813L) risultato="ERROR_CPU_SET_INVALID";
	else
	if (i_error==994L) risultato="ERROR_EA_ACCESS_DENIED";
	else
	if (i_error==995L) risultato="ERROR_OPERATION_ABORTED";
	else
	if (i_error==996L) risultato="ERROR_IO_INCOMPLETE";
	else
	if (i_error==997L) risultato="ERROR_IO_PENDING";
	else
	if (i_error==998L) risultato="ERROR_NOACCESS";
	else
	if (i_error==999L) risultato="ERROR_SWAPERROR";
	else
	if (i_error==1001L) risultato="ERROR_STACK_OVERFLOW";
	else
	if (i_error==1002L) risultato="ERROR_INVALID_MESSAGE";
	else
	if (i_error==1003L) risultato="ERROR_CAN_NOT_COMPLETE";
	else
	if (i_error==1004L) risultato="ERROR_INVALID_FLAGS";
	else
	if (i_error==1005L) risultato="ERROR_UNRECOGNIZED_VOLUME";
	else
	if (i_error==1006L) risultato="ERROR_FILE_INVALID";
	else
	if (i_error==1007L) risultato="ERROR_FULLSCREEN_MODE";
	else
	if (i_error==1008L) risultato="ERROR_NO_TOKEN";
	else
	if (i_error==1009L) risultato="ERROR_BADDB";
	else
	if (i_error==1010L) risultato="ERROR_BADKEY";
	else
	if (i_error==1011L) risultato="ERROR_CANTOPEN";
	else
	if (i_error==1012L) risultato="ERROR_CANTREAD";
	else
	if (i_error==1013L) risultato="ERROR_CANTWRITE";
	else
	if (i_error==1014L) risultato="ERROR_REGISTRY_RECOVERED";
	else
	if (i_error==1015L) risultato="ERROR_REGISTRY_CORRUPT";
	else
	if (i_error==1016L) risultato="ERROR_REGISTRY_IO_FAILED";
	else
	if (i_error==1017L) risultato="ERROR_NOT_REGISTRY_FILE";
	else
	if (i_error==1018L) risultato="ERROR_KEY_DELETED";
	else
	if (i_error==1019L) risultato="ERROR_NO_LOG_SPACE";
	else
	if (i_error==1020L) risultato="ERROR_KEY_HAS_CHILDREN";
	else
	if (i_error==1021L) risultato="ERROR_CHILD_MUST_BE_VOLATILE";
	else
	if (i_error==1022L) risultato="ERROR_NOTIFY_ENUM_DIR";
	else
	if (i_error==1051L) risultato="ERROR_DEPENDENT_SERVICES_RUNNING";
	else
	if (i_error==1052L) risultato="ERROR_INVALID_SERVICE_CONTROL";
	else
	if (i_error==1053L) risultato="ERROR_SERVICE_REQUEST_TIMEOUT";
	else
	if (i_error==1054L) risultato="ERROR_SERVICE_NO_THREAD";
	else
	if (i_error==1055L) risultato="ERROR_SERVICE_DATABASE_LOCKED";
	else
	if (i_error==1056L) risultato="ERROR_SERVICE_ALREADY_RUNNING";
	else
	if (i_error==1057L) risultato="ERROR_INVALID_SERVICE_ACCOUNT";
	else
	if (i_error==1058L) risultato="ERROR_SERVICE_DISABLED";
	else
	if (i_error==1059L) risultato="ERROR_CIRCULAR_DEPENDENCY";
	else
	if (i_error==1060L) risultato="ERROR_SERVICE_DOES_NOT_EXIST";
	else
	if (i_error==1061L) risultato="ERROR_SERVICE_CANNOT_ACCEPT_CTRL";
	else
	if (i_error==1062L) risultato="ERROR_SERVICE_NOT_ACTIVE";
	else
	if (i_error==1063L) risultato="ERROR_FAILED_SERVICE_CONTROLLER_CONNECT";
	else
	if (i_error==1064L) risultato="ERROR_EXCEPTION_IN_SERVICE";
	else
	if (i_error==1065L) risultato="ERROR_DATABASE_DOES_NOT_EXIST";
	else
	if (i_error==1066L) risultato="ERROR_SERVICE_SPECIFIC_ERROR";
	else
	if (i_error==1067L) risultato="ERROR_PROCESS_ABORTED";
	else
	if (i_error==1068L) risultato="ERROR_SERVICE_DEPENDENCY_FAIL";
	else
	if (i_error==1069L) risultato="ERROR_SERVICE_LOGON_FAILED";
	else
	if (i_error==1070L) risultato="ERROR_SERVICE_START_HANG";
	else
	if (i_error==1071L) risultato="ERROR_INVALID_SERVICE_LOCK";
	else
	if (i_error==1072L) risultato="ERROR_SERVICE_MARKED_FOR_DELETE";
	else
	if (i_error==1073L) risultato="ERROR_SERVICE_EXISTS";
	else
	if (i_error==1074L) risultato="ERROR_ALREADY_RUNNING_LKG";
	else
	if (i_error==1075L) risultato="ERROR_SERVICE_DEPENDENCY_DELETED";
	else
	if (i_error==1076L) risultato="ERROR_BOOT_ALREADY_ACCEPTED";
	else
	if (i_error==1077L) risultato="ERROR_SERVICE_NEVER_STARTED";
	else
	if (i_error==1078L) risultato="ERROR_DUPLICATE_SERVICE_NAME";
	else
	if (i_error==1079L) risultato="ERROR_DIFFERENT_SERVICE_ACCOUNT";
	else
	if (i_error==1080L) risultato="ERROR_CANNOT_DETECT_DRIVER_FAILURE";
	else
	if (i_error==1081L) risultato="ERROR_CANNOT_DETECT_PROCESS_ABORT";
	else
	if (i_error==1082L) risultato="ERROR_NO_RECOVERY_PROGRAM";
	else
	if (i_error==1083L) risultato="ERROR_SERVICE_NOT_IN_EXE";
	else
	if (i_error==1084L) risultato="ERROR_NOT_SAFEBOOT_SERVICE";
	else
	if (i_error==1100L) risultato="ERROR_END_OF_MEDIA";
	else
	if (i_error==1101L) risultato="ERROR_FILEMARK_DETECTED";
	else
	if (i_error==1102L) risultato="ERROR_BEGINNING_OF_MEDIA";
	else
	if (i_error==1103L) risultato="ERROR_SETMARK_DETECTED";
	else
	if (i_error==1104L) risultato="ERROR_NO_DATA_DETECTED";
	else
	if (i_error==1105L) risultato="ERROR_PARTITION_FAILURE";
	else
	if (i_error==1106L) risultato="ERROR_INVALID_BLOCK_LENGTH";
	else
	if (i_error==1107L) risultato="ERROR_DEVICE_NOT_PARTITIONED";
	else
	if (i_error==1108L) risultato="ERROR_UNABLE_TO_LOCK_MEDIA";
	else
	if (i_error==1109L) risultato="ERROR_UNABLE_TO_UNLOAD_MEDIA";
	else
	if (i_error==1110L) risultato="ERROR_MEDIA_CHANGED";
	else
	if (i_error==1111L) risultato="ERROR_BUS_RESET";
	else
	if (i_error==1112L) risultato="ERROR_NO_MEDIA_IN_DRIVE";
	else
	if (i_error==1113L) risultato="ERROR_NO_UNICODE_TRANSLATION";
	else
	if (i_error==1114L) risultato="ERROR_DLL_INIT_FAILED";
	else
	if (i_error==1115L) risultato="ERROR_SHUTDOWN_IN_PROGRESS";
	else
	if (i_error==1116L) risultato="ERROR_NO_SHUTDOWN_IN_PROGRESS";
	else
	if (i_error==1117L) risultato="ERROR_IO_DEVICE";
	else
	if (i_error==1118L) risultato="ERROR_SERIAL_NO_DEVICE";
	else
	if (i_error==1119L) risultato="ERROR_IRQ_BUSY";
	else
	if (i_error==1120L) risultato="ERROR_MORE_WRITES";
	else
	if (i_error==1121L) risultato="ERROR_COUNTER_TIMEOUT";
	else
	if (i_error==1122L) risultato="ERROR_FLOPPY_ID_MARK_NOT_FOUND";
	else
	if (i_error==1123L) risultato="ERROR_FLOPPY_WRONG_CYLINDER";
	else
	if (i_error==1124L) risultato="ERROR_FLOPPY_UNKNOWN_ERROR";
	else
	if (i_error==1125L) risultato="ERROR_FLOPPY_BAD_REGISTERS";
	else
	if (i_error==1126L) risultato="ERROR_DISK_RECALIBRATE_FAILED";
	else
	if (i_error==1127L) risultato="ERROR_DISK_OPERATION_FAILED";
	else
	if (i_error==1128L) risultato="ERROR_DISK_RESET_FAILED";
	else
	if (i_error==1129L) risultato="ERROR_EOM_OVERFLOW";
	else
	if (i_error==1130L) risultato="ERROR_NOT_ENOUGH_SERVER_MEMORY";
	else
	if (i_error==1131L) risultato="ERROR_POSSIBLE_DEADLOCK";
	else
	if (i_error==1132L) risultato="ERROR_MAPPED_ALIGNMENT";
	else
	if (i_error==1140L) risultato="ERROR_SET_POWER_STATE_VETOED";
	else
	if (i_error==1141L) risultato="ERROR_SET_POWER_STATE_FAILED";
	else
	if (i_error==1142L) risultato="ERROR_TOO_MANY_LINKS";
	else
	if (i_error==1150L) risultato="ERROR_OLD_WIN_VERSION";
	else
	if (i_error==1151L) risultato="ERROR_APP_WRONG_OS";
	else
	if (i_error==1152L) risultato="ERROR_SINGLE_INSTANCE_APP";
	else
	if (i_error==1153L) risultato="ERROR_RMODE_APP";
	else
	if (i_error==1154L) risultato="ERROR_INVALID_DLL";
	else
	if (i_error==1155L) risultato="ERROR_NO_ASSOCIATION";
	else
	if (i_error==1156L) risultato="ERROR_DDE_FAIL";
	else
	if (i_error==1157L) risultato="ERROR_DLL_NOT_FOUND";
	else
	if (i_error==1158L) risultato="ERROR_NO_MORE_USER_HANDLES";
	else
	if (i_error==1159L) risultato="ERROR_MESSAGE_SYNC_ONLY";
	else
	if (i_error==1160L) risultato="ERROR_SOURCE_ELEMENT_EMPTY";
	else
	if (i_error==1161L) risultato="ERROR_DESTINATION_ELEMENT_FULL";
	else
	if (i_error==1162L) risultato="ERROR_ILLEGAL_ELEMENT_ADDRESS";
	else
	if (i_error==1163L) risultato="ERROR_MAGAZINE_NOT_PRESENT";
	else
	if (i_error==1164L) risultato="ERROR_DEVICE_REINITIALIZATION_NEEDED";
	else
	if (i_error==1165L) risultato="ERROR_DEVICE_REQUIRES_CLEANING";
	else
	if (i_error==1166L) risultato="ERROR_DEVICE_DOOR_OPEN";
	else
	if (i_error==1167L) risultato="ERROR_DEVICE_NOT_CONNECTED";
	else
	if (i_error==1168L) risultato="ERROR_NOT_FOUND";
	else
	if (i_error==1169L) risultato="ERROR_NO_MATCH";
	else
	if (i_error==1170L) risultato="ERROR_SET_NOT_FOUND";
	else
	if (i_error==1171L) risultato="ERROR_POINT_NOT_FOUND";
	else
	if (i_error==1172L) risultato="ERROR_NO_TRACKING_SERVICE";
	else
	if (i_error==1173L) risultato="ERROR_NO_VOLUME_ID";
	else
	if (i_error==1175L) risultato="ERROR_UNABLE_TO_REMOVE_REPLACED";
	else
	if (i_error==1176L) risultato="ERROR_UNABLE_TO_MOVE_REPLACEMENT";
	else
	if (i_error==1177L) risultato="ERROR_UNABLE_TO_MOVE_REPLACEMENT_2";
	else
	if (i_error==1178L) risultato="ERROR_JOURNAL_DELETE_IN_PROGRESS";
	else
	if (i_error==1179L) risultato="ERROR_JOURNAL_NOT_ACTIVE";
	else
	if (i_error==1180L) risultato="ERROR_POTENTIAL_FILE_FOUND";
	else
	if (i_error==1181L) risultato="ERROR_JOURNAL_ENTRY_DELETED";
	else
	if (i_error==1190L) risultato="ERROR_SHUTDOWN_IS_SCHEDULED";
	else
	if (i_error==1191L) risultato="ERROR_SHUTDOWN_USERS_LOGGED_ON";
	else
	if (i_error==1200L) risultato="ERROR_BAD_DEVICE";
	else
	if (i_error==1201L) risultato="ERROR_CONNECTION_UNAVAIL";
	else
	if (i_error==1202L) risultato="ERROR_DEVICE_ALREADY_REMEMBERED";
	else
	if (i_error==1203L) risultato="ERROR_NO_NET_OR_BAD_PATH";
	else
	if (i_error==1204L) risultato="ERROR_BAD_PROVIDER";
	else
	if (i_error==1205L) risultato="ERROR_CANNOT_OPEN_PROFILE";
	else
	if (i_error==1206L) risultato="ERROR_BAD_PROFILE";
	else
	if (i_error==1207L) risultato="ERROR_NOT_CONTAINER";
	else
	if (i_error==1208L) risultato="ERROR_EXTENDED_ERROR";
	else
	if (i_error==1209L) risultato="ERROR_INVALID_GROUPNAME";
	else
	if (i_error==1210L) risultato="ERROR_INVALID_COMPUTERNAME";
	else
	if (i_error==1211L) risultato="ERROR_INVALID_EVENTNAME";
	else
	if (i_error==1212L) risultato="ERROR_INVALID_DOMAINNAME";
	else
	if (i_error==1213L) risultato="ERROR_INVALID_SERVICENAME";
	else
	if (i_error==1214L) risultato="ERROR_INVALID_NETNAME";
	else
	if (i_error==1215L) risultato="ERROR_INVALID_SHARENAME";
	else
	if (i_error==1216L) risultato="ERROR_INVALID_PASSWORDNAME";
	else
	if (i_error==1217L) risultato="ERROR_INVALID_MESSAGENAME";
	else
	if (i_error==1218L) risultato="ERROR_INVALID_MESSAGEDEST";
	else
	if (i_error==1219L) risultato="ERROR_SESSION_CREDENTIAL_CONFLICT";
	else
	if (i_error==1220L) risultato="ERROR_REMOTE_SESSION_LIMIT_EXCEEDED";
	else
	if (i_error==1221L) risultato="ERROR_DUP_DOMAINNAME";
	else
	if (i_error==1222L) risultato="ERROR_NO_NETWORK";
	else
	if (i_error==1223L) risultato="ERROR_CANCELLED";
	else
	if (i_error==1224L) risultato="ERROR_USER_MAPPED_FILE";
	else
	if (i_error==1225L) risultato="ERROR_CONNECTION_REFUSED";
	else
	if (i_error==1226L) risultato="ERROR_GRACEFUL_DISCONNECT";
	else
	if (i_error==1227L) risultato="ERROR_ADDRESS_ALREADY_ASSOCIATED";
	else
	if (i_error==1228L) risultato="ERROR_ADDRESS_NOT_ASSOCIATED";
	else
	if (i_error==1229L) risultato="ERROR_CONNECTION_INVALID";
	else
	if (i_error==1230L) risultato="ERROR_CONNECTION_ACTIVE";
	else
	if (i_error==1231L) risultato="ERROR_NETWORK_UNREACHABLE";
	else
	if (i_error==1232L) risultato="ERROR_HOST_UNREACHABLE";
	else
	if (i_error==1233L) risultato="ERROR_PROTOCOL_UNREACHABLE";
	else
	if (i_error==1234L) risultato="ERROR_PORT_UNREACHABLE";
	else
	if (i_error==1235L) risultato="ERROR_REQUEST_ABORTED";
	else
	if (i_error==1236L) risultato="ERROR_CONNECTION_ABORTED";
	else
	if (i_error==1237L) risultato="ERROR_RETRY";
	else
	if (i_error==1238L) risultato="ERROR_CONNECTION_COUNT_LIMIT";
	else
	if (i_error==1239L) risultato="ERROR_LOGIN_TIME_RESTRICTION";
	else
	if (i_error==1240L) risultato="ERROR_LOGIN_WKSTA_RESTRICTION";
	else
	if (i_error==1241L) risultato="ERROR_INCORRECT_ADDRESS";
	else
	if (i_error==1242L) risultato="ERROR_ALREADY_REGISTERED";
	else
	if (i_error==1243L) risultato="ERROR_SERVICE_NOT_FOUND";
	else
	if (i_error==1244L) risultato="ERROR_NOT_AUTHENTICATED";
	else
	if (i_error==1245L) risultato="ERROR_NOT_LOGGED_ON";
	else
	if (i_error==1246L) risultato="ERROR_CONTINUE";
	else
	if (i_error==1247L) risultato="ERROR_ALREADY_INITIALIZED";
	else
	if (i_error==1248L) risultato="ERROR_NO_MORE_DEVICES";
	else
	if (i_error==1249L) risultato="ERROR_NO_SUCH_SITE";
	else
	if (i_error==1250L) risultato="ERROR_DOMAIN_CONTROLLER_EXISTS";
	else
	if (i_error==1251L) risultato="ERROR_ONLY_IF_CONNECTED";
	else
	if (i_error==1252L) risultato="ERROR_OVERRIDE_NOCHANGES";
	else
	if (i_error==1253L) risultato="ERROR_BAD_USER_PROFILE";
	else
	if (i_error==1254L) risultato="ERROR_NOT_SUPPORTED_ON_SBS";
	else
	if (i_error==1255L) risultato="ERROR_SERVER_SHUTDOWN_IN_PROGRESS";
	else
	if (i_error==1256L) risultato="ERROR_HOST_DOWN";
	else
	if (i_error==1257L) risultato="ERROR_NON_ACCOUNT_SID";
	else
	if (i_error==1258L) risultato="ERROR_NON_DOMAIN_SID";
	else
	if (i_error==1259L) risultato="ERROR_APPHELP_BLOCK";
	else
	if (i_error==1260L) risultato="ERROR_ACCESS_DISABLED_BY_POLICY";
	else
	if (i_error==1261L) risultato="ERROR_REG_NAT_CONSUMPTION";
	else
	if (i_error==1262L) risultato="ERROR_CSCSHARE_OFFLINE";
	else
	if (i_error==1263L) risultato="ERROR_PKINIT_FAILURE";
	else
	if (i_error==1264L) risultato="ERROR_SMARTCARD_SUBSYSTEM_FAILURE";
	else
	if (i_error==1265L) risultato="ERROR_DOWNGRADE_DETECTED";
	else
	if (i_error==1271L) risultato="ERROR_MACHINE_LOCKED";
	else
	if (i_error==1272L) risultato="ERROR_SMB_GUEST_LOGON_BLOCKED";
	else
	if (i_error==1273L) risultato="ERROR_CALLBACK_SUPPLIED_INVALID_DATA";
	else
	if (i_error==1274L) risultato="ERROR_SYNC_FOREGROUND_REFRESH_REQUIRED";
	else
	if (i_error==1275L) risultato="ERROR_DRIVER_BLOCKED";
	else
	if (i_error==1276L) risultato="ERROR_INVALID_IMPORT_OF_NON_DLL";
	else
	if (i_error==1277L) risultato="ERROR_ACCESS_DISABLED_WEBBLADE";
	else
	if (i_error==1278L) risultato="ERROR_ACCESS_DISABLED_WEBBLADE_TAMPER";
	else
	if (i_error==1279L) risultato="ERROR_RECOVERY_FAILURE";
	else
	if (i_error==1280L) risultato="ERROR_ALREADY_FIBER";
	else
	if (i_error==1281L) risultato="ERROR_ALREADY_THREAD";
	else
	if (i_error==1282L) risultato="ERROR_STACK_BUFFER_OVERRUN";
	else
	if (i_error==1283L) risultato="ERROR_PARAMETER_QUOTA_EXCEEDED";
	else
	if (i_error==1284L) risultato="ERROR_DEBUGGER_INACTIVE";
	else
	if (i_error==1285L) risultato="ERROR_DELAY_LOAD_FAILED";
	else
	if (i_error==1286L) risultato="ERROR_VDM_DISALLOWED";
	else
	if (i_error==1287L) risultato="ERROR_UNIDENTIFIED_ERROR";
	else
	if (i_error==1288L) risultato="ERROR_INVALID_CRUNTIME_PARAMETER";
	else
	if (i_error==1289L) risultato="ERROR_BEYOND_VDL";
	else
	if (i_error==1290L) risultato="ERROR_INCOMPATIBLE_SERVICE_SID_TYPE";
	else
	if (i_error==1291L) risultato="ERROR_DRIVER_PROCESS_TERMINATED";
	else
	if (i_error==1292L) risultato="ERROR_IMPLEMENTATION_LIMIT";
	else
	if (i_error==1293L) risultato="ERROR_PROCESS_IS_PROTECTED";
	else
	if (i_error==1294L) risultato="ERROR_SERVICE_NOTIFY_CLIENT_LAGGING";
	else
	if (i_error==1295L) risultato="ERROR_DISK_QUOTA_EXCEEDED";
	else
	if (i_error==1296L) risultato="ERROR_CONTENT_BLOCKED";
	else
	if (i_error==1297L) risultato="ERROR_INCOMPATIBLE_SERVICE_PRIVILEGE";
	else
	if (i_error==1298L) risultato="ERROR_APP_HANG";
	else
	if (i_error==1299L) risultato="ERROR_INVALID_LABEL";
	else
	if (i_error==1300L) risultato="ERROR_NOT_ALL_ASSIGNED";
	else
	if (i_error==1301L) risultato="ERROR_SOME_NOT_MAPPED";
	else
	if (i_error==1302L) risultato="ERROR_NO_QUOTAS_FOR_ACCOUNT";
	else
	if (i_error==1303L) risultato="ERROR_LOCAL_USER_SESSION_KEY";
	else
	if (i_error==1304L) risultato="ERROR_NULL_LM_PASSWORD";
	else
	if (i_error==1305L) risultato="ERROR_UNKNOWN_REVISION";
	else
	if (i_error==1306L) risultato="ERROR_REVISION_MISMATCH