a2223c7302
doesn't depend on the 'comp' set. ok espie@ deraadt@
353 lines
15 KiB
C
353 lines
15 KiB
C
/*
|
|
* (c) Thomas Pornin 1999 - 2002
|
|
* (c) Louis P. Santillan 2011
|
|
* This file is derived from tune.h
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 4. The name of the authors may not be used to endorse or promote
|
|
* products derived from this software without specific prior written
|
|
* permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
|
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
*/
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* The LOW_MEM macro triggers the use of macro storage which uses less
|
|
* memory. It actually also improves performance on large, modern machines
|
|
* (due to less cache pressure). This option implies no limitation (except
|
|
* on the number of arguments a macro may, which is then limited to 32766)
|
|
* so it is on by default. Non-LOW_MEM code is considered deprecated.
|
|
*/
|
|
#define LOW_MEM
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Define AMIGA for systems using "drive letters" at the beginning of
|
|
* some paths; define MSDOS on systems with drive letters and using
|
|
* backslashes to seperate directory components.
|
|
*/
|
|
/* #define AMIGA */
|
|
/* #define MSDOS */
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Define this if your compiler does not know the strftime() function;
|
|
* TurboC 2.01 under Msdos does not know strftime().
|
|
*/
|
|
/* #define NOSTRFTIME */
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Buffering: there are two levels of buffering on input and output streams:
|
|
* the standard libc buffering (manageable with setbuf() and setvbuf())
|
|
* and some buffering provided by ucpp itself. The ucpp buffering uses
|
|
* two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG
|
|
* (as defined below).
|
|
* You can disable one or both of these bufferings by defining the macros
|
|
* NO_LIBC_BUF and NO_UCPP_BUF.
|
|
*/
|
|
/* #define NO_LIBC_BUF */
|
|
/* #define NO_UCPP_BUF */
|
|
|
|
/*
|
|
* On Unix stations, the system call mmap() might be used on input files.
|
|
* This option is a subclause of ucpp internal buffering. On one station,
|
|
* a 10% speed improvement was observed. Do not define this unless the
|
|
* host architecture has the following characteristics:
|
|
* -- Posix / Single Unix compliance
|
|
* -- Text files correspond one to one with memory representation
|
|
* If a file is not seekable or not mmapable, ucpp will revert to the
|
|
* standard fread() solution.
|
|
*
|
|
* This feature is still considered beta quality. On some systems where
|
|
* files can be bigger than memory address space (mainly, 32-bit systems
|
|
* with files bigger than 4 GB), this option makes ucpp fail to operate
|
|
* on those extremely large files.
|
|
*/
|
|
#define UCPP_MMAP
|
|
|
|
/*
|
|
* Performance issues:
|
|
* -- On memory-starved systems, such as Minix-i86, do not use ucpp
|
|
* buffering; keep only libc buffering.
|
|
* -- If you do not use libc buffering, activate the UCPP_MMAP option.
|
|
* Note that the UCPP_MMAP option is ignored if ucpp buffering is not
|
|
* activated.
|
|
*
|
|
* On an Athlon 1200 running FreeBSD 4.7, the best performances are
|
|
* achieved when libc buffering is activated and/or UCPP_MMAP is on.
|
|
*/
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Define this if you want ucpp to generate tokenized PRAGMA tokens;
|
|
* otherwise, it will generate raw string contents. This setting is
|
|
* irrelevant to the stand-alone version of ucpp.
|
|
*/
|
|
#define PRAGMA_TOKENIZE
|
|
|
|
/*
|
|
* Define this to the special character that marks the end of tokens with
|
|
* a string value inside a tokenized PRAGMA token. The #pragma and _Pragma()
|
|
* directives which use this character will be a bit more difficult to
|
|
* decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because
|
|
* it cannot appear inside a #pragma or _Pragma(), since newlines cannot be
|
|
* embedded inside tokens, neither directly nor by macro substitution and
|
|
* stringization. Besides, '\n' is portable.
|
|
*/
|
|
#define PRAGMA_TOKEN_END ((unsigned char)'\n')
|
|
|
|
/*
|
|
* Define this if you want ucpp to include encountered #pragma directives
|
|
* in its output in non-lexer mode; _Pragma() are translated to equivalent
|
|
* #pragma directives.
|
|
*/
|
|
#define PRAGMA_DUMP
|
|
|
|
/*
|
|
* According to my interpretation of the C99 standard, _Pragma() are
|
|
* evaluated wherever macro expansion could take place. However, Neil Booth,
|
|
* whose mother language is English (contrary to me) and who is well aware
|
|
* of the C99 standard (and especially the C preprocessor) told me that
|
|
* it was unclear whether _Pragma() are evaluated inside directives such
|
|
* as #if, #include and #line. If you want to disable the evaluation of
|
|
* _Pragma() inside such directives, define the following macro.
|
|
*/
|
|
/* #define NO_PRAGMA_IN_DIRECTIVE */
|
|
|
|
/*
|
|
* The C99 standard mandates that the operator `##' must yield a single,
|
|
* valid token, lest undefined behaviour befall upon thy head. Hence,
|
|
* for instance, `+ ## +=' is forbidden, because `++=' is not a valid
|
|
* token (although it is a valid list of two tokens, `++' and `=').
|
|
* However, ucpp only emits a warning for such sin, and unmerges the
|
|
* tokens (thus emitting `+' then `+=' for that example). When ucpp
|
|
* produces text output, those two tokens will be separated by a space
|
|
* character so that the basic rule of text output is preserved: when
|
|
* parsed again, text output yields the exact same stream of tokens.
|
|
* That extra space is virtual: it does not count as a true whitespace
|
|
* token for stringization.
|
|
*
|
|
* However, it might be desirable, for some uses other than preprocessing
|
|
* C source code, not to emit that extra space at all. To make ucpp behave
|
|
* that way, define the DSHARP_TOKEN_MERGE macro. Please note that this
|
|
* can trigger spurious token merging. For instance, with that macro
|
|
* activated, `+ ## +=' will be output as `++=' which, if preprocessed
|
|
* again, will read as `++' followed by `='.
|
|
*
|
|
* All this is irrelevant to lexer mode; and trying to merge incompatible
|
|
* tokens is a shooting offence, anyway.
|
|
*/
|
|
/* #define DSHARP_TOKEN_MERGE */
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Define INMACRO_FLAG to include two flags to the structure lexer_state,
|
|
* that tell whether tokens come from a macro-replacement, and count those
|
|
* macro-replacements.
|
|
*/
|
|
/* #define INMACRO_FLAG */
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Paths where files are looked for by default, when #include is used.
|
|
* Typical path is /usr/local/include and /usr/include, in that order.
|
|
* If you want to set up no path, define the macro to 0.
|
|
*
|
|
* For Linux, get gcc includes too, or you will miss things like stddef.h.
|
|
* The exact path varies much, depending on the distribution.
|
|
*/
|
|
#define STD_INCLUDE_PATH "/usr/local/include", "/usr/include"
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Arithmetic code for evaluation of #if expressions. Evaluation
|
|
* uses either a native machine type, or an emulated two's complement
|
|
* type. Division by 0 and overflow on division are considered as errors
|
|
* and reported as such. If ARITHMETIC_CHECKS is defined, all other
|
|
* operations that imply undefined or implementation-defined behaviour
|
|
* are reported as warnings but otherwise performed nonetheless.
|
|
*
|
|
* For native type evaluation, the following macros should be defined:
|
|
* NATIVE_SIGNED the native signed type
|
|
* NATIVE_UNSIGNED the native corresponding unsigned type
|
|
* NATIVE_UNSIGNED_BITS the native unsigned type width, in bits
|
|
* NATIVE_SIGNED_MIN the native signed type minimum value
|
|
* NATIVE_SIGNED_MAX the native signed type maximum value
|
|
*
|
|
* The code in the arith.c file performs some tricky detection
|
|
* operations on the native type representation and possible existence
|
|
* of a trap representation. These operations assume a C99-compliant
|
|
* compiler; on a C90-only compiler, the operations are valid but may
|
|
* yield incorrect results. You may force those settings with some
|
|
* more macros: see the comments in arith.c (look for "ARCH_DEFINED").
|
|
* Remember that this is mostly a non-issue, unless you are building
|
|
* ucpp with a pre-C99 cross-compiler and either the host or target
|
|
* architecture uses a non-two's complement representation of signed
|
|
* integers. Such a combination is pretty rare nowadays, so the best
|
|
* you can do is forgetting completely this paragraph and live in peace.
|
|
*
|
|
*
|
|
* If you do not have a handy native type (for instance, you compile ucpp
|
|
* with a C90 compiler which lacks the "long long" type, or you compile
|
|
* ucpp for a cross-compiler which should support an evaluation integer
|
|
* type of a size that is not available on the host machine), you may use
|
|
* a simulated type. The type uses two's complement representation and
|
|
* may have any width from 2 bits to twice the underlying native type
|
|
* width, inclusive (odd widths are allowed). To use an emulated type,
|
|
* make sure that NATIVE_SIGNED is not defined, and define the following
|
|
* macros:
|
|
* SIMUL_ARITH_SUBTYPE the native underlying type to use
|
|
* SIMUL_SUBTYPE_BITS the native underlying type width
|
|
* SIMUL_NUMBITS the emulated type width
|
|
*
|
|
* Undefined and implementation-defined behaviours are warned upon, if
|
|
* ARITHMETIC_CHECKS is defined. Results are truncated to the type
|
|
* width; shift count for the << and >> operators is reduced modulo the
|
|
* emulatd type width; right shifting of a signed negative value performs
|
|
* sign extension (the result is left-padded with bits set to 1).
|
|
*/
|
|
|
|
/*
|
|
* For native type evaluation with a 64-bit "long long" type.
|
|
*/
|
|
#define NATIVE_SIGNED long long
|
|
#define NATIVE_UNSIGNED unsigned long long
|
|
#define NATIVE_UNSIGNED_BITS 64
|
|
#define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1)
|
|
#define NATIVE_SIGNED_MAX 9223372036854775807LL
|
|
|
|
/*
|
|
* For emulation of a 64-bit type using a native 32-bit "unsigned long"
|
|
* type.
|
|
#undef NATIVE_SIGNED
|
|
#define SIMUL_ARITH_SUBTYPE unsigned long
|
|
#define SIMUL_SUBTYPE_BITS 32
|
|
#define SIMUL_NUMBITS 64
|
|
*/
|
|
|
|
/*
|
|
* Comment out the following line if you want to deactivate arithmetic
|
|
* checks (warnings upon undefined and implementation-defined
|
|
* behaviour). Arithmetic checks slow down a bit arithmetic operations,
|
|
* especially multiplications, but this should not be an issue with
|
|
* typical C source code.
|
|
*/
|
|
#define ARITHMETIC_CHECKS
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* To force signedness of wide character constants, define WCHAR_SIGNEDNESS
|
|
* to 0 for unsigned, 1 for signed. By default, wide character constants
|
|
* are signed if the native `char' type is signed, and unsigned otherwise.
|
|
#define WCHAR_SIGNEDNESS 0
|
|
*/
|
|
|
|
/*
|
|
* Standard assertions. They should include one cpu() assertion, one machine()
|
|
* assertion (identical to cpu()), and one or more system() assertions.
|
|
*
|
|
* for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux)
|
|
* for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux)
|
|
* for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris)
|
|
*
|
|
* These are only suggestions. On Solaris, machine() should be defined
|
|
* for i386 or sparc (standard system header use such an assertion). For
|
|
* cross-compilation, define assertions related to the target architecture.
|
|
*
|
|
* If you want no standard assertion, define STD_ASSERT to 0.
|
|
*/
|
|
#define STD_ASSERT 0
|
|
/*
|
|
#define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \
|
|
"system(freebsd)"
|
|
*/
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* System predefined macros. Nothing really mandatory, but some programs
|
|
* might rely on those.
|
|
* Each string must be either "name" or "name=token-list". If you want
|
|
* no predefined macro, define STD_MACROS to 0.
|
|
*/
|
|
#define STD_MACROS 0
|
|
/*
|
|
#define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \
|
|
"__FreeBSD__=4", "__unix__", "__i386__"
|
|
*/
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Default flags; HANDLE_ASSERTIONS is required for Solaris system headers.
|
|
* See cpp.h for the definition of these flags.
|
|
*/
|
|
#define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \
|
|
| WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \
|
|
| CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \
|
|
| KEEP_OUTPUT | HANDLE_TRIGRAPHS \
|
|
| HANDLE_ASSERTIONS)
|
|
#define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \
|
|
| MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \
|
|
| HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS)
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp().
|
|
* This is non-ANSI, but it improves performance on some POSIX system.
|
|
* On typical C source code, such improvement is completely negligeable.
|
|
*/
|
|
/* #define POSIX_JMP */
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Maximum value (plus one) of a character handled by the lexer; 128 is
|
|
* alright for ASCII native source code, but 256 is needed for EBCDIC.
|
|
* 256 is safe in both cases; you will have big problems if you set
|
|
* this value to INT_MAX or above. On Minix-i86 or Msdos (small memory
|
|
* model), define MAX_CHAR_VAL to 128.
|
|
*
|
|
* Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware
|
|
* that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL
|
|
* values of type int (MSTATE is defined in lexer.c and is about 40).
|
|
*/
|
|
#define MAX_CHAR_VAL 128
|
|
|
|
/*
|
|
* If you want some extra character to be considered as whitespace,
|
|
* define this macro to that space. On ISO-8859-1 machines, 160 is
|
|
* the code for the unbreakable space.
|
|
*/
|
|
/* #define UNBREAKABLE_SPACE 160 */
|
|
|
|
/*
|
|
* If you want whitespace tokens contents to be recorded (making them
|
|
* tokens with a string content), define this. The macro STRING_TOKEN
|
|
* will be adjusted accordingly.
|
|
* Without this option, whitespace tokens are not even returned by the
|
|
* lex() function. This is irrelevant for the non-lexer mode (almost --
|
|
* it might slow down a bit ucpp, and with this option, comments will be
|
|
* kept inside #pragma directives).
|
|
*/
|
|
/* #define SEMPER_FIDELIS */
|
|
|
|
/* End of options overridable by UCPP_CONFIG and config.h */
|