399 lines
11 KiB
C++
399 lines
11 KiB
C++
|
|
|
|
#ifndef COCO_calcEntrySCANNER_H__
|
|
#define COCO_calcEntrySCANNER_H__
|
|
|
|
#include <climits>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <cwchar>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
|
|
// io.h and fcntl are used to ensure binary read from streams on windows
|
|
#if _MSC_VER >= 1300
|
|
#include <io.h>
|
|
#include <fcntl.h>
|
|
#endif
|
|
|
|
#if _MSC_VER >= 1400
|
|
#define coco_swprintf swprintf_s
|
|
#elif _MSC_VER >= 1300
|
|
#define coco_swprintf _snwprintf
|
|
#else
|
|
// assume every other compiler knows swprintf
|
|
#define coco_swprintf swprintf
|
|
#endif
|
|
|
|
|
|
#define COCO_WCHAR_MAX 65535
|
|
|
|
|
|
namespace Foam {
|
|
namespace functionEntries {
|
|
namespace calcEntryInternal {
|
|
|
|
|
|
|
|
// * * * * * * * * * * Wide Character String Routines * * * * * * * * * * * //
|
|
|
|
//
|
|
// string handling, wide character
|
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
//! Create by copying str
|
|
wchar_t* coco_string_create(const wchar_t* str);
|
|
|
|
//! Create a substring of str starting at index and length characters long
|
|
wchar_t* coco_string_create(const wchar_t* str, int index, int length);
|
|
|
|
//! Create a lowercase string from str
|
|
wchar_t* coco_string_create_lower(const wchar_t* str);
|
|
|
|
//! Create a lowercase substring from str starting at index and length characters long
|
|
wchar_t* coco_string_create_lower(const wchar_t* str, int index, int length);
|
|
|
|
//! Create a string by concatenating str1 and str2
|
|
wchar_t* coco_string_create_append(const wchar_t* str1, const wchar_t* str2);
|
|
|
|
//! Create a string by concatenating a character to the end of str
|
|
wchar_t* coco_string_create_append(const wchar_t* str, const wchar_t ch);
|
|
|
|
//! Free storage and nullify the argument
|
|
void coco_string_delete(wchar_t* &str);
|
|
|
|
//! The length of the str, or 0 if the str is NULL
|
|
int coco_string_length(const wchar_t* str);
|
|
|
|
//! Return true if the str ends with the endstr
|
|
bool coco_string_endswith(const wchar_t* str, const wchar_t* endstr);
|
|
|
|
//! Return the index of the first occurrence of ch.
|
|
// Return -1 if nothing is found.
|
|
int coco_string_indexof(const wchar_t* str, const wchar_t ch);
|
|
|
|
//! Return the index of the last occurrence of ch.
|
|
// Return -1 if nothing is found.
|
|
int coco_string_lastindexof(const wchar_t* str, const wchar_t ch);
|
|
|
|
//! Append str to dest
|
|
void coco_string_merge(wchar_t* &dest, const wchar_t* str);
|
|
|
|
//! Compare strings, return true if they are equal
|
|
bool coco_string_equal(const wchar_t* str1, const wchar_t* str2);
|
|
|
|
//! Compare strings, return 0 if they are equal
|
|
int coco_string_compareto(const wchar_t* str1, const wchar_t* str2);
|
|
|
|
//! Simple string hashing function
|
|
int coco_string_hash(const wchar_t* str);
|
|
|
|
//
|
|
// String conversions
|
|
// ~~~~~~~~~~~~~~~~~~
|
|
|
|
//! Convert wide string to double
|
|
double coco_string_toDouble(const wchar_t* str);
|
|
|
|
//! Convert wide string to float
|
|
float coco_string_toFloat(const wchar_t* str);
|
|
|
|
//
|
|
// String handling, byte character
|
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
//! Create by copying byte str
|
|
wchar_t* coco_string_create(const char* str);
|
|
|
|
//! Create a substring of byte str starting at index and length characters long
|
|
wchar_t* coco_string_create(const char* str, int index, int length);
|
|
|
|
//! Create a byte string by copying str
|
|
char* coco_string_create_char(const wchar_t* str);
|
|
|
|
//! Create a byte substring of str starting at index and length characters long
|
|
char* coco_string_create_char(const wchar_t* str, int index, int length);
|
|
|
|
//! Free storage and nullify the argument
|
|
void coco_string_delete(char* &str);
|
|
|
|
|
|
//
|
|
// String conversions, byte character
|
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
//! Convert byte string to double
|
|
double coco_string_toDouble(const char* str);
|
|
|
|
//! Convert byte string to float
|
|
float coco_string_toFloat(const char* str);
|
|
|
|
// * * * * * * * * * End of Wide Character String Routines * * * * * * * * * //
|
|
|
|
|
|
//! Scanner Token
|
|
class Token
|
|
{
|
|
public:
|
|
int kind; //!< token kind
|
|
int pos; //!< token position in the source text (starting at 0)
|
|
int col; //!< token column (starting at 1)
|
|
int line; //!< token line (starting at 1)
|
|
wchar_t* val; //!< token value
|
|
Token *next; //!< Peek tokens are kept in linked list
|
|
|
|
Token(); //!< Construct null
|
|
~Token(); //!< Destructor - cleanup allocated val??
|
|
};
|
|
|
|
|
|
//! Scanner Buffer
|
|
//
|
|
//! This Buffer supports the following cases:
|
|
//! -# seekable stream (file)
|
|
//! -# whole stream in buffer
|
|
//! -# part of stream in buffer
|
|
//! -# non seekable stream (network, console)
|
|
class Buffer {
|
|
private:
|
|
unsigned char *buf; //!< input buffer
|
|
int bufCapacity; //!< capacity of buf
|
|
int bufLen; //!< length of buffer
|
|
int bufPos; //!< current position in buffer
|
|
int bufStart; //!< position of first byte in buffer relative to input stream
|
|
int fileLen; //!< length of input stream (may change if the stream is no file)
|
|
FILE* cStream; //!< input stdio stream (normally seekable)
|
|
std::istream* stdStream; //!< STL std stream (seekable)
|
|
bool isUserStream_; //!< was the stream opened by the user?
|
|
|
|
int ReadNextStreamChunk();
|
|
bool CanSeek() const; //!< true if stream can be seeked otherwise false
|
|
|
|
protected:
|
|
Buffer(Buffer*); //!< for the UTF8Buffer
|
|
|
|
public:
|
|
static const int EoF = COCO_WCHAR_MAX + 1;
|
|
|
|
//! Attach buffer to a stdio stream.
|
|
//! User streams are not closed in the destructor
|
|
Buffer(FILE*, bool isUserStream = true);
|
|
|
|
//! Attach buffer to an STL std stream
|
|
//! User streams are not closed in the destructor
|
|
explicit Buffer(std::istream*, bool isUserStream = true);
|
|
|
|
//! Copy buffer contents from constant string
|
|
//! Handled internally as an istringstream
|
|
explicit Buffer(std::string&);
|
|
|
|
//! Copy buffer contents from constant character string
|
|
Buffer(const unsigned char* chars, int len);
|
|
//! Copy buffer contents from constant character string
|
|
Buffer(const char* chars, int len);
|
|
|
|
//! Close stream (but not user streams) and free buf (if any)
|
|
virtual ~Buffer();
|
|
|
|
virtual void Close(); //!< Close stream (but not user streams)
|
|
virtual int Read(); //!< Get character from stream or buffer
|
|
virtual int Peek(); //!< Peek character from stream or buffer
|
|
|
|
virtual int GetPos() const;
|
|
virtual void SetPos(int value);
|
|
};
|
|
|
|
|
|
//! A Scanner buffer that handles UTF-8 characters
|
|
class UTF8Buffer : public Buffer {
|
|
public:
|
|
UTF8Buffer(Buffer* b) : Buffer(b) {}
|
|
virtual int Read();
|
|
};
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
// StartStates
|
|
//------------------------------------------------------------------------------
|
|
//! maps characters (integers) to start states of tokens
|
|
class StartStates {
|
|
private:
|
|
class Elem {
|
|
public:
|
|
int key, val;
|
|
Elem *next;
|
|
Elem(int k, int v) :
|
|
key(k), val(v), next(0)
|
|
{}
|
|
};
|
|
|
|
Elem **tab;
|
|
|
|
public:
|
|
StartStates() :
|
|
tab(new Elem*[128])
|
|
{
|
|
memset(tab, 0, 128 * sizeof(Elem*));
|
|
}
|
|
|
|
virtual ~StartStates() {
|
|
for (int i = 0; i < 128; ++i) {
|
|
Elem *e = tab[i];
|
|
while (e) {
|
|
Elem *next = e->next;
|
|
delete e;
|
|
e = next;
|
|
}
|
|
}
|
|
delete [] tab;
|
|
}
|
|
|
|
void set(int key, int val) {
|
|
Elem *e = new Elem(key, val);
|
|
int k = unsigned(key) % 128;
|
|
e->next = tab[k];
|
|
tab[k] = e;
|
|
}
|
|
|
|
int state(int key) {
|
|
Elem *e = tab[unsigned(key) % 128];
|
|
while (e && e->key != key) e = e->next;
|
|
return e ? e->val : 0;
|
|
}
|
|
};
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
// KeywordMap
|
|
//------------------------------------------------------------------------------
|
|
//! maps strings to integers (identifiers to keyword kinds)
|
|
class KeywordMap {
|
|
private:
|
|
class Elem {
|
|
public:
|
|
wchar_t *key;
|
|
int val;
|
|
Elem *next;
|
|
Elem(const wchar_t *k, int v) :
|
|
key(coco_string_create(k)), val(v), next(0)
|
|
{}
|
|
virtual ~Elem() {
|
|
coco_string_delete(key);
|
|
}
|
|
};
|
|
|
|
Elem **tab;
|
|
|
|
public:
|
|
KeywordMap() :
|
|
tab(new Elem*[128])
|
|
{
|
|
memset(tab, 0, 128 * sizeof(Elem*));
|
|
}
|
|
|
|
virtual ~KeywordMap() {
|
|
for (int i = 0; i < 128; ++i) {
|
|
Elem *e = tab[i];
|
|
while (e) {
|
|
Elem *next = e->next;
|
|
delete e;
|
|
e = next;
|
|
}
|
|
}
|
|
delete [] tab;
|
|
}
|
|
|
|
void set(const wchar_t *key, int val) {
|
|
Elem *e = new Elem(key, val);
|
|
const int k = coco_string_hash(key) % 128;
|
|
e->next = tab[k];
|
|
tab[k] = e;
|
|
}
|
|
|
|
int get(const wchar_t *key, int defaultVal) {
|
|
Elem *e = tab[coco_string_hash(key) % 128];
|
|
while (e && !coco_string_equal(e->key, key)) e = e->next;
|
|
return e ? e->val : defaultVal;
|
|
}
|
|
};
|
|
|
|
|
|
//! A Coco/R Scanner
|
|
class Scanner {
|
|
private:
|
|
static const int maxT = 13;
|
|
static const int noSym = 13;
|
|
|
|
static const int eofSym = 0; //!< end-of-file token id
|
|
static const char EOL = '\n'; //!< end-of-line character
|
|
|
|
void *firstHeap; //!< the start of the heap management
|
|
void *heap; //!< the currently active block
|
|
void *heapTop; //!< the top of the heap
|
|
void **heapEnd; //!< the end of the last heap block
|
|
|
|
StartStates start; //!< A map of start states for particular characters
|
|
KeywordMap keywords; //!< A hash of keyword literals to token kind
|
|
|
|
Token *t; //!< current token
|
|
wchar_t *tval; //!< text of current token
|
|
int tvalLength; //!< maximum capacity (length) for tval
|
|
int tlen; //!< length of tval
|
|
|
|
Token *tokens; //!< list of tokens already peeked (first token is a dummy)
|
|
Token *pt; //!< current peek token
|
|
|
|
int ch; //!< current input character
|
|
|
|
int pos; //!< byte position of current character
|
|
int line; //!< line number of current character
|
|
int col; //!< column number of current character
|
|
int oldEols; //!< the number of EOLs that appeared in a comment
|
|
|
|
void CreateHeapBlock(); //!< add a heap block, freeing unused ones
|
|
Token* CreateToken(); //!< fit token on the heap
|
|
void AppendVal(Token* tok); //!< adjust tok->val to point to the heap and copy tval into it
|
|
|
|
void Init(); //!< complete the initialization for the constructors
|
|
void NextCh(); //!< get the next input character into ch
|
|
void AddCh(); //!< append the character ch to tval
|
|
bool Comment0();
|
|
bool Comment1();
|
|
|
|
Token* NextToken(); //!< get the next token
|
|
|
|
public:
|
|
//! The scanner buffer
|
|
Buffer *buffer;
|
|
|
|
//! Using an existing open file handle for the scanner
|
|
Scanner(FILE*);
|
|
|
|
//! Using an existing open STL std stream
|
|
explicit Scanner(std::istream&);
|
|
|
|
//! Open a file for reading and attach scanner
|
|
explicit Scanner(const wchar_t* fileName);
|
|
|
|
//! Attach scanner to an existing character buffer
|
|
Scanner(const unsigned char* chars, int len);
|
|
//! Attach scanner to an existing character buffer
|
|
Scanner(const char* chars, int len);
|
|
|
|
~Scanner(); //!< free heap and allocated memory
|
|
Token* Scan(); //!< get the next token (possibly a token already seen during peeking)
|
|
Token* Peek(); //!< peek for the next token, ignore pragmas
|
|
void ResetPeek(); //!< ensure that peeking starts at the current scan position
|
|
|
|
}; // end Scanner
|
|
|
|
} // namespace
|
|
} // namespace
|
|
} // namespace
|
|
|
|
|
|
#endif // COCO_calcEntrySCANNER_H__
|
|
|