/* nl-string.c Copyright (C) 2016 Lutz Mueller This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "newlisp.h" #ifdef SUPPORT_UTF8 #include #endif #define PCRE_STATIC #include "pcre.h" #include "protos.h" extern SYMBOL * sysSymbol[]; extern CELL * countCell; extern int bigEndian; extern int pagesize; #define OVECCOUNT (MAX_REGEX_EXP * 3) /* max sub expressions in PCRE */ void regexError(char * msg_1, int num, const char * msg_2); pcre * pcreCachedCompile(char * pattern, int options); CELL * cellTokenString(char * *source, size_t * srclen, char *separator, pcre *re); #ifdef USE_WIN_UTF16PATH int open_utf16(const char* a, int b, int c); #endif /* ---------------------------- string processing ----------------------- */ int my_strnicmp(char * s1, char * s2, ssize_t size) { while(toupper(*s1) == toupper(*s2)) { if(--size <= 0) return(0); s1++, s2++; } return(toupper(*s1) - toupper(*s2)); } CELL * substring(char * string, ssize_t slen, ssize_t offset, ssize_t len) { if(offset < 0) { offset = slen + offset; if(offset < 0) return(errorProc(ERR_STRING_INDEX_INVALID)); } else offset = (offset > slen) ? slen : offset; if(len < 0) { len = slen - offset + len; if(len < 0) len = 0; } if(len == MAX_LONG) len = slen - offset; else len = ((offset + len) > slen) ? (slen - offset) : len; return(stuffStringN(string + offset, len)); } #define SEARCH_SIZE 0x1000 CELL * p_search(CELL * params) { UINT fileHandle; ssize_t bytesRead; ssize_t position; int flag = 0; CELL * next; #ifdef LFS off_t filePosition; off_t foundPosition; double result; #else ssize_t filePosition; ssize_t foundPosition; #endif char * searchString; char * buffer; INT options; size_t len; params = getInteger(params, (UINT *)&fileHandle); params = getStringSize(params, &searchString, &len, TRUE); if(len == 0) return(nilCell); options = -1; if(params != nilCell) { next = params->next; params = evaluateExpression(params); if(isNumber(params->type) || params->type == CELL_STRING) /* 10.6.1 also accept string for options */ parseRegexOptions(params, (UINT *)&options, FALSE); else { flag = (params->type != CELL_NIL); if(next != nilCell) /* 10.6.1 also accept string for options */ parseRegexOptions(next, (UINT *)&options, TRUE); } } buffer = (char *)allocMemory(SEARCH_SIZE + 1); if((filePosition = lseek((int)fileHandle, 0, SEEK_CUR)) == -1) return(nilCell); foundPosition = 0; do { memset(buffer, 0, SEARCH_SIZE + 1); bytesRead = read((int)fileHandle, buffer, SEARCH_SIZE); if(options == -1) position = searchBuffer(buffer, bytesRead, searchString, len, 1); else position = searchBufferRegex(buffer, 0, searchString, bytesRead , options, &len); if(position != -1) { if(flag) foundPosition = filePosition + position + len; else foundPosition = filePosition + position; break; } filePosition = filePosition + bytesRead - len; lseek((int)fileHandle, filePosition, SEEK_SET); } while (bytesRead == SEARCH_SIZE); freeMemory(buffer); if(position == -1) return(nilCell); lseek((int)fileHandle, foundPosition, SEEK_SET); #ifdef LFS result = foundPosition; return(stuffFloat(result)); #else return(stuffInteger(foundPosition)); #endif } /* returns a new string cell */ CELL * implicitIndexString(CELL * cell, CELL * params) { ssize_t index; char * ptr; #ifndef SUPPORT_UTF8 char str[2]; #else ssize_t p; #endif ptr = (char*)cell->contents; getInteger(params, (UINT *)&index); stringCell = cell; if(cell->aux == 1 && (index == 0 || index == -1)) return(copyCell(cell)); #ifndef SUPPORT_UTF8 index = adjustNegativeIndex(index, cell->aux - 1); str[0] = *(ptr + index); str[1] = 0; stringIndexPtr = ptr + index; return(stuffStringN(str, 1)); #else index = adjustNegativeIndex(index, utf8_wlen(ptr, ptr + cell->aux)); ptr = utf8_index(ptr, index); p = utf8_1st_len(ptr); if(ptr + p > (char *)cell->contents + cell->aux) return(errorProc(ERR_INVALID_UTF8)); stringIndexPtr = ptr; return(stuffStringN(ptr, p)); #endif } /* only used from setf() for string insertion uses the global stringIndexPtr in insertPtr the insert point set by implicitIndexString() modifies cellStr with new and returns new */ CELL * setNthStr(CELL * cellStr, CELL * new, char * insertPtr) { char * newStr; char * oldStr; size_t newLen, oldLen, len, offset; char * str; getStringSize(new, &newStr, &newLen, FALSE); oldStr = (char*)cellStr->contents; oldLen = cellStr->aux - 1; if(oldLen == 0) return(copyCell(cellStr)); #ifdef SUPPORT_UTF8 len = utf8_1st_len(insertPtr); #else len = 1; #endif str = callocMemory(oldLen + newLen - len + 1); offset = insertPtr - oldStr; memcpy(str, oldStr, offset); memcpy(str + offset, newStr, newLen); memcpy(str + offset + newLen, oldStr + offset + len, oldLen - offset - len); cellStr->contents = (UINT)str; cellStr->aux = oldLen + newLen - len + 1; freeMemory(oldStr); return(new); } CELL * p_char(CELL * params) { char * string; ssize_t offset; size_t len; int num; CELL * datCell; char buff[2]; if(params == nilCell) return(errorProc(ERR_MISSING_ARGUMENT)); params = getEvalDefault(params, &datCell); switch(datCell->type) { case CELL_STRING: symbolCheck = NULL; string = (char *)datCell->contents; #ifndef SUPPORT_UTF8 len = (size_t)datCell->aux - 1; if(len == 0) return(nilCell); #else len = utf8_wlen(string, string + datCell->aux); if(len == 0) { if(datCell->aux - 1 == 0) return(nilCell); else return(stuffInteger(0)); } #endif if(params != nilCell) { params = getInteger(params, (UINT*)&offset); #ifdef SUPPORT_UTF8 if(getFlag(params)) /* treat as offset into 8-bit array */ { offset = adjustNegativeIndex(offset, (size_t)datCell->aux - 1); return(stuffInteger((UINT)*((unsigned char *)string + (UINT)offset))); } #endif } else offset = 0; offset = adjustNegativeIndex(offset, (size_t)len); #ifndef SUPPORT_UTF8 return(stuffInteger((UINT)*((unsigned char *)string + (UINT)offset))); case CELL_LONG: buff[0] = (int)datCell->contents; break; #ifndef NEWLISP64 case CELL_INT64: num = *(INT64 *)&datCell->aux; buff[0] = num; break; case CELL_FLOAT: num = *(double*)&datCell->aux; buff[0] = num; #else /* NEWLISP64 */ case CELL_FLOAT: num = *(double*)&datCell->contents; buff[0] = num; #endif /* NEWLISP64 */ break; #else /* SUPPORT_UTF8 */ while(offset--) string += utf8_1st_len(string); utf8_wchar(string, &num); return(stuffInteger(num)); #ifndef NEWLISP64 case CELL_FLOAT: case CELL_LONG: case CELL_INT64: string = allocMemory(UTF8_MAX_BYTES + 1); if(datCell->type == CELL_FLOAT) num = *(double*)&datCell->aux; else if(datCell->type == CELL_INT64) num = *(INT64 *)&datCell->aux; else num = datCell->contents; #else /* NEWLISP64 */ case CELL_FLOAT: case CELL_LONG: string = allocMemory(UTF8_MAX_BYTES + 1); if(datCell->type == CELL_FLOAT) num = *(double*)&datCell->contents; else num = (int)datCell->contents; #endif /* NEWLISP 64 */ len = wchar_utf8(num, string); datCell = stuffStringN(string, len); free(string); return(datCell); #endif /* SUPPORT_UTF8 */ default: buff[0] = 0; } return(stuffStringN(buff, 1)); } CELL * p_explode(CELL * params) { char * string; CELL * cell; CELL * result; ssize_t size; ssize_t len = 1; int flag = 0; #ifdef SUPPORT_UTF8 int clen; char * ptr; #endif params = getEvalDefault(params, &cell); if(isList(cell->type)) return(explodeList((CELL*)cell->contents, params)); if(cell->type == CELL_STRING) { string = (char *)cell->contents; size = cell->aux - 1; } else return(errorProcExt(ERR_LIST_OR_STRING_EXPECTED, cell)); if(params != nilCell) { params = getInteger(params, (UINT *)&len); flag = getFlag(params); if(!flag && len > size) len = size; } result = cell = getCell(CELL_EXPRESSION); if(size == 0 || len <= 0) return(result); #ifndef SUPPORT_UTF8 if(flag && size < len) return(result); cell->contents = (UINT)stuffStringN(string, len); cell = (CELL *)cell->contents; string += len; while((size -= len) > 0) { if(flag && size < len) break; cell->next = stuffStringN(string, (size >= len) ? len : size); cell = cell->next; string += len; } #else size = utf8_wlen(string, string + size + 1); ptr = utf8_index(string, len); clen = ptr - string; if(flag && size < len) return(result); cell->contents = (UINT)stuffStringN(string, clen); cell = (CELL *)cell->contents; string += clen; while((size -= len) > 0) { if(flag && size < len) break; ptr = utf8_index(string, len); clen = ptr - string; cell->next = stuffStringN(string, clen); cell = cell->next; string += clen; } #endif return(result); } #define STR_LOWER 0 #define STR_UPPER 1 #define STR_TITLE 2 CELL * strUpperLower(CELL * params, int type) { #ifndef SUPPORT_UTF8 char * string; char * ptr; #else char * utf8str; int * unicode; int * ptr; size_t size; #endif CELL * cell; int option = FALSE; #ifndef SUPPORT_UTF8 params = getString(params, &string); option = getFlag(params); cell = stuffString(string); ptr = (void *)cell->contents; if(type == STR_UPPER) while(*ptr) { *ptr = toupper(*ptr); ptr++; } else { if(type == STR_TITLE) if(*ptr) { *ptr = toupper(*ptr); ptr++; } if(((type == STR_TITLE) && option) || (type == STR_LOWER)) while(*ptr) { *ptr = tolower(*ptr); ptr++; } } #else params = getStringSize(params, &utf8str, &size, TRUE); option = getFlag(params); size = utf8_wlen(utf8str, utf8str + size + 1); unicode = allocMemory((size + 1) * sizeof(int)); size = utf8_wstr(unicode, utf8str, size); ptr = unicode; /* Note that on many platforms towupper/towlower do not work correctly for non-ascii unicodes */ if(type == STR_UPPER) while(*ptr) { *ptr = towupper(*ptr); ptr++; } else { if(type == STR_TITLE) if(*ptr) { *ptr = towupper(*ptr); ptr++; } if(((type == STR_TITLE) && option) || (type == STR_LOWER)) while(*ptr) { *ptr = towlower(*ptr); ptr++; } } utf8str = allocMemory(size * UTF8_MAX_BYTES + 1); size = wstr_utf8(utf8str, unicode, size * UTF8_MAX_BYTES + 1); utf8str = reallocMemory(utf8str, size + 1); free(unicode); return(makeStringCell(utf8str, size)); #endif return(cell); } CELL * p_upper(CELL * params) {return(strUpperLower(params, STR_UPPER));} CELL * p_lower(CELL * params) {return(strUpperLower(params, STR_LOWER));} CELL * p_title(CELL * params) {return(strUpperLower(params, STR_TITLE));} char * getFormatType(char * fmt, int * type) { PARSE_FORMAT: /* get % */ while(*fmt != '%' && *fmt != 0) fmt++; if(*fmt == 0) { *type = 0; return(fmt); } fmt++; /* is it double %% for '%' */ if(*fmt == '%') { fmt++; goto PARSE_FORMAT; } /* get optional decimal format spec */ /* this GNU extension does not work on FreeBSD, OpenBSD */ /* works on OSX 10.9, Linux and Unix with GNU libc or alike */ if(*fmt == '\'') fmt++; /* get width spec */ /* force + before numbers */ if(*fmt == '+') fmt++; else { /* left align numbers or strings */ if(*fmt == '-') { fmt++; /* force + before numbers */ if(*fmt == '+') fmt++; } } while(isdigit((int)*fmt) && *fmt != 0) fmt++; if(*fmt == 0) return(NULL); /* its a float or string with dot precision*/ if(*fmt == '.') { fmt++; while(isdigit((int)*fmt) && *fmt !=0) fmt++; if(*fmt == 0) return(NULL); if(*fmt == 'f' || *fmt == 'g' || *fmt =='G' || *fmt == 'e' || *fmt == 'E') { *type = CELL_FLOAT; return(++fmt); } if(*fmt == 's') { *type = CELL_STRING; return(++fmt); } else return(NULL); } /* its a float without dot */ if(*fmt == 'f' || *fmt == 'g' || *fmt =='G' || *fmt == 'e' || *fmt == 'E') { *type = CELL_FLOAT; return(++fmt); } /* its an integer or character */ #ifdef TRU64 if(*fmt == 'd' || *fmt == 'i' || *fmt == 'u' || *fmt == 'x' || *fmt == 'X' || *fmt == 'c' || *fmt == 'o') #else if(*fmt == 'd' || *fmt == 'u' || *fmt == 'x' || *fmt == 'X' || *fmt == 'c' || *fmt == 'o') #endif { #ifdef TRU64 #ifndef NEWLISP64 *type = CELL_INT64; #else *type = CELL_LONG; #endif #else *type = CELL_LONG; #endif return(++fmt); } /* its a string */ if(*fmt == 's') { *type = CELL_STRING; return(++fmt); } /* supporting ld, li, lu, lx, lX formats */ if(*fmt == 'l' && (*(fmt + 1) == 'd' || *(fmt + 1) == 'i' || *(fmt + 1) == 'u' || *(fmt + 1) =='x' || *(fmt + 1) == 'X')) { #ifndef NEWLISP64 *type = CELL_INT64; #else *type = CELL_LONG; #endif return(fmt+2); } #ifndef TRUE64 /* UNIX except TRUE64 and WINDOWS MINGW suporting lld, llu, llx, llX formats */ if(*fmt == 'l' && *(fmt + 1) == 'l' && (*(fmt + 2) == 'd' || *(fmt + 2) == 'u' || *(fmt + 2) =='x' || *(fmt + 2) == 'X')) { #ifndef NEWLISP64 *type = CELL_INT64; #else *type = CELL_LONG; #endif return(fmt+3); } #endif #ifdef WINDOWS /* on MINGW also support MS conventions */ if(memcmp(fmt, "I64", 3) == 0 && (*(fmt + 3) == 'd' || *(fmt + 3) == 'u' || *(fmt + 3) =='x' || *(fmt + 3) == 'X')) { #ifndef NEWLISP64 *type = CELL_INT64; #else *type = CELL_LONG; #endif return(fmt+4); } #endif /* L and q seem not to be suported on most GCC although in the docs if(*fmt == 'L' && (*(fmt + 1) == 'd' || *(fmt + 1) =='x' || *(fmt + 1) == 'X')) { *type = CELL_INT64; return(fmt+3); } */ /* its a wchar_t unicode string */ if(*fmt == 'l' && *(fmt + 1) == 's') { *type = CELL_STRING; return(fmt+2); } return(NULL); } CELL * p_format(CELL * params) { char * format; char * fmt; char * nextfmt; CELL * cell; STREAM fmtStream = {NULL, NULL, 0, 14, 0}; int fType; double floatNum; UINT intNum; #ifndef NEWLISP64 INT64 bigNum; #endif int evalFlag = TRUE; char saveChar; params = getString(params, &format); fmt = format; fmtStream.buffer = fmtStream.ptr = callocMemory(15); while(params->type != CELL_NIL) { /* printf("entry>%s<\n", fmt); */ nextfmt = getFormatType(fmt, &fType); /* printf("exit>%s<\n", nextfmt); */ if(nextfmt == NULL) { closeStrStream(&fmtStream); return(errorProcExt2(ERR_FORMAT_STRING, stuffString(format))); } if(evalFlag) cell = evaluateExpression(params); else cell = params; if(cell->type == CELL_EXPRESSION && fmt == format) { params = (CELL *)cell->contents; evalFlag = FALSE; cell = params; } if(fType == 0) { closeStrStream(&fmtStream); return(errorProcExt(ERR_NUM_ARGS, params)); } saveChar = *nextfmt; *nextfmt = 0; if(fType == CELL_LONG) { if(isNumber(cell->type)) cell = getIntegerExt(cell, &intNum, FALSE); else goto FORMAT_DATA_ERROR; varPrintf((UINT)&fmtStream, fmt, intNum); goto NEXT_FORMAT; } #ifndef NEWLISP64 if(fType == CELL_INT64) { if(isNumber(cell->type)) cell = getInteger64Ext(cell, &bigNum, TRUE); else goto FORMAT_DATA_ERROR; varPrintf((UINT)&fmtStream, fmt, bigNum); goto NEXT_FORMAT; } #endif if(fType == CELL_FLOAT) { if(cell->type == CELL_FLOAT) #ifndef NEWLISP64 floatNum = *(double *)&cell->aux; else if(cell->type == CELL_INT64) floatNum = *(INT64 *)&cell->aux; #else floatNum = *(double *)&cell->contents; #endif else if(cell->type == CELL_LONG) floatNum = (INT)cell->contents; #ifdef BIGINT else if(cell->type == CELL_BIGINT) floatNum = bigintCellToFloat(cell); #endif else goto FORMAT_DATA_ERROR; varPrintf((UINT)&fmtStream, fmt, floatNum); goto NEXT_FORMAT; } if(fType != cell->type) goto FORMAT_DATA_ERROR; /* printf("stream>%s< with >%s<\n", (char *)cell->contents, fmt); */ varPrintf((UINT)&fmtStream, fmt, cell->contents); NEXT_FORMAT: *nextfmt = saveChar; fmt = nextfmt; params = params->next; continue; FORMAT_DATA_ERROR: *nextfmt = saveChar; closeStrStream(&fmtStream); return(errorProcExt(ERR_FORMAT_DATA_TYPE, params)); } getFormatType(fmt, &fType); if(fType != 0) { closeStrStream(&fmtStream); errorProcExt2(ERR_NUM_ARGS, stuffString(format)); } varPrintf((UINT)&fmtStream, fmt); cell = makeStringCell(fmtStream.buffer, fmtStream.position); symbolCheck = NULL; return(cell); } void openStrStream(STREAM * stream, size_t buffSize, int reopenFlag) { if(stream->buffer != NULL && reopenFlag) { if(buffSize == stream->size) { memset(stream->buffer, 0, buffSize); stream->position = stream->handle = 0; stream->ptr = stream->buffer; return; } } freeMemory(stream->buffer); stream->buffer = stream->ptr = callocMemory(buffSize + 1); stream->size = buffSize; stream->position = stream->handle = 0; } void closeStrStream(STREAM * stream) { if(stream->buffer != NULL) freeMemory(stream->buffer); stream->buffer = stream->ptr = NULL; stream->size = stream->position = 0; if(stream->handle != 0) { close((int)stream->handle); stream->handle = 0; } } char * cellToString(CELL * cell, size_t * size, int quoteFlag) { static STREAM strStream = {NULL, NULL, 0, 0, 0}; if(cell->type == CELL_STRING) { *size = cell->aux -1; return((char *)cell->contents); } openStrStream(&strStream, MAX_STRING, TRUE); prettyPrintFlags |= PRETTYPRINT_STRING; printCell(cell , quoteFlag, (UINT)&strStream); prettyPrintFlags &= ~PRETTYPRINT_STRING; *size = strStream.position; return(strStream.buffer); } void writeStreamChar(STREAM * stream, char chr) { if(stream->position == stream->size) { stream->size += stream->size / 2; stream->buffer = reallocMemory(stream->buffer, stream->size + 1); memset(stream->buffer + stream->position, 0, stream->size - stream->position + 1); stream->ptr = stream->buffer + stream->position; } *(stream->ptr++) = chr; stream->position++; } void writeStreamStr(STREAM * stream, char * buff, size_t length) { size_t newPosition; if(length == 0) length = strlen(buff); newPosition = stream->position + length; if(newPosition >= stream->size) { while(newPosition >= stream->size) stream->size += stream->size / 2; stream->buffer = reallocMemory(stream->buffer, stream->size + 1); memset(stream->buffer + stream->position, 0, stream->size - stream->position + 1); stream->ptr = stream->buffer + stream->position; } memcpy(stream->ptr, buff, length); stream->ptr += length; stream->position = newPosition; } /* creates a memory buffer and reads byte into it until the [/text] is found */ #define LLEN 7 #define LIMIT "[/text]" char * readStreamText(STREAM * stream, int * size) { STREAM outStream = {NULL, NULL, 0, 0, 0}; ssize_t findPos = -1; size_t searchLen; char * result; openStrStream(&outStream, MAX_STRING, 0); while(findPos == -1) { if((searchLen = strlen(stream->ptr)) < LLEN) break; findPos = searchBuffer(stream->ptr, searchLen, LIMIT, LLEN, TRUE); if(findPos != -1) { if(findPos > 0) writeStreamStr(&outStream, stream->ptr, findPos); stream->ptr += findPos + LLEN; result = allocMemory(outStream.position + 1); memcpy(result, outStream.buffer, outStream.position); *size = outStream.position; *(result + outStream.position) = 0; closeStrStream(&outStream); return(result); } writeStreamStr(&outStream, stream->ptr, searchLen - LLEN); stream->position += (stream->ptr - stream->buffer); stream->position += searchLen - LLEN; if(stream->handle == 0) /* its not a file */ { stream->buffer = stream->ptr; break; } else { if(searchLen == LLEN) break; lseek(stream->handle, stream->position, SEEK_SET); memset(stream->buffer, 0, stream->size + 1); if(read(stream->handle, stream->buffer, stream->size) > 0) stream->ptr = stream->buffer; else { *stream->ptr = 0; break; } } } closeStrStream(&outStream); return(NULL); } /* this is only used for reading with getToken() stream->size does not reflect the real size of the buffer as in makeStreamFromFile() */ int makeStreamFromString(STREAM * stream, char * str) { int len = strlen(str); stream->handle = stream->position = 0; stream->buffer = stream->ptr = str; /* make getToken work to the end of str */ stream->size = len + 4 * MAX_STRING; return(len); } int makeStreamFromFile(STREAM * stream, char * fileName, size_t size, size_t offset) { int result; if((stream->handle = openFile(fileName, "r", NULL)) == -1) return(0); stream->ptr = stream->buffer = (char *)callocMemory(size + 1); if(offset != 0) lseek((int)stream->handle, offset, SEEK_SET); stream->position = offset; stream->size = size; /* load first buffer */ if((result = read(stream->handle, stream->buffer, size)) < 0) { closeStrStream(stream); return(0); } return(TRUE); } ssize_t searchBuffer(char * buffer, size_t length, char * string, size_t size, int caseFlag) { size_t position = 0; if(caseFlag == FALSE) { while(position < length) { if(toupper(*buffer) == toupper(*string)) if(my_strnicmp(buffer, string, size) == 0) break; position++; buffer++; } } else /* case sensitive */ { while(position < length) { if(*buffer == *string) if(memcmp(buffer, string, size) == 0) break; position++; buffer++; } } if(position == length) return(-1); return(position); } /* eliminated in v.8.4.1 CELL * patternMatchS(char * pattern, char * string) { CELL * cell; CELL * match; CELL * star, * plus; char * start; int starLen, len; start = string; starLen = len = 0; MATCH: switch(*pattern) { case '\\': ++pattern; if(*pattern != *string) return(nilCell); break; case 0: if(*string != 0) return(nilCell); return(stuffStringN(start, len)); case '?': if(*string == 0) return(nilCell); break; case '#': if(*string == 0) return(nilCell); if(!isDigit((unsigned char)*string)) return(nilCell); break; case '+': if(*(pattern + 1) == 0) { if(*string == 0) plus = stuffString(""); else if(*(string + 1) == 0) plus = stuffStringN(string, 1); else return(nilCell); if(len == 0) return(plus); cell = stuffStringN(start, len); cell->next = plus; return(cell); } if((match = patternMatchS(pattern+1, string)) != nilCell) plus = stuffString(""); else if((match = patternMatchS(pattern+1, string+1)) != nilCell) plus = stuffStringN(string, 1); else return(nilCell); if(len == 0) cell = plus; else { cell = stuffStringN(start, len); cell->next = plus; } plus->next = match; return(cell); case '*': if(*(pattern + 1) == 0) { if(len == 0) return(stuffString(string)); cell = stuffStringN(start, len); cell->next = stuffString(string); return(cell); } if((match = patternMatchS(pattern+1, string)) != nilCell) { star = stuffStringN(start+len, starLen); if(len == 0) cell = star; else { cell = stuffStringN(start, len); cell->next = star; } star->next = match; return(cell); } if(*string != 0) ++string, ++starLen; else return(nilCell); goto MATCH; default: if(*pattern != *string) return(nilCell); break; } ++pattern; ++string; ++len; goto MATCH; } */ CELL * p_integer(CELL * params) { char * intString; INT64 num; INT base; CELL * deflt, * cell; INT64 result; deflt = getEvalDefault(params, &cell); symbolCheck = NULL; if(cell->type == CELL_STRING) intString = (char *)cell->contents; else if(isNumber(cell->type)) { #ifdef BIGINT if(cell->type == CELL_BIGINT) num = bigintToInt64(cell); else #endif getInteger64Ext(cell, &num, FALSE); return(stuffInteger64(num)); } else return(copyCell(evaluateExpression(deflt))); while(isspace((int)*intString)) intString++; if(deflt->next != nilCell) getInteger(deflt->next, (UINT *)&base); else base = 0; if(base == 16) { if(!isxdigit((unsigned char)*intString)) { if(*intString != '-' && *intString != '+') goto INT_DEFAULT; if(!isxdigit((unsigned char)*(intString+1))) goto INT_DEFAULT; } } else if(!isDigit((unsigned char)*intString)) { if(*intString != '-' && *intString != '+') goto INT_DEFAULT; if(!isDigit((unsigned char)*(intString+1))) goto INT_DEFAULT; } else if(*intString == '0' && (*(intString + 1) == 'b' || *(intString + 1) == 'B')) { intString += 2; base = 2; } #ifdef TRU64 result = strtoul(intString, NULL, base); #else result = strtoull(intString,(char **)0, base); #endif return(stuffInteger64(result)); INT_DEFAULT: return(copyCell(evaluateExpression(deflt))); } CELL * p_float(CELL * params) { char * fltString; double value; CELL * deflt; CELL * cell; deflt = getEvalDefault(params, &cell); symbolCheck = NULL; if(cell->type == CELL_STRING) fltString = (char *)cell->contents; else if(isNumber(cell->type)) { #ifdef BIGINT if(cell->type == CELL_BIGINT) value = bigintCellToFloat(cell); else #endif getFloat(cell, &value); return(stuffFloat(value)); } else return(copyCell(evaluateExpression(deflt))); while(isspace((int)*fltString)) fltString++; if(!isDigit((unsigned char)*fltString)) { if(*fltString != '-' && *fltString != '+' && *fltString != lc_decimal_point) return(copyCell(evaluateExpression(deflt))); if(!isDigit((unsigned char)*(fltString+1))) return(copyCell(evaluateExpression(deflt))); } value = atof(fltString); return( stuffFloat(value) ); } CELL * p_symbol(CELL * params) { char * token; char number[32]; SYMBOL * context; SYMBOL * sPtr; CELL * cell; cell = evaluateExpression(params); switch(cell->type) { case CELL_LONG: snprintf(number, 30, "%"PRIdPTR, cell->contents); token = number; break; #ifndef NEWLISP64 case CELL_INT64: token = number; snprintf(number, 30, "%"PRId64, *(INT64 *)&cell->aux); break; #endif /* NEWLISP64 */ case CELL_FLOAT: #ifndef NEWLISP64 snprintf(number, 30, "%1.10g",*(double *)&cell->aux); #else snprintf(number, 30, "%1.10g",*(double *)&cell->contents); #endif token = number; break; case CELL_STRING: token = (char*)cell->contents; if((cell->aux - 1) > MAX_SYMBOL) errorProcExt(ERR_STRING_TOO_LONG, cell); break; case CELL_SYMBOL: sPtr = (SYMBOL*)cell->contents; token = sPtr->name; break; default: return(errorProcExt(ERR_NUMBER_OR_STRING_EXPECTED, params)); } if((params = params->next) == nilCell) context = currentContext; else if((context = getCreateContext(params, TRUE)) == NULL) return(errorProcExt(ERR_SYMBOL_OR_CONTEXT_EXPECTED, params)); if(params->next != nilCell) { cell = evaluateExpression(params->next); if(isNil(cell)) { sPtr = lookupSymbol(token, context); if(sPtr == NULL) return(nilCell); return(stuffSymbol(sPtr)); } } sPtr = translateCreateSymbol(token, CELL_NIL, context, TRUE); return(stuffSymbol(sPtr)); } CELL * p_symbolSource(CELL * params) { STREAM strStream = {NULL, NULL, 0, 0, 0}; CELL * cell; openStrStream(&strStream, MAX_STRING, 0); serializeSymbols(params, (UINT)&strStream); cell = stuffString(strStream.buffer); closeStrStream(&strStream); return(cell); } CELL * p_string(CELL * params) { CELL * cell; STREAM strStream = {NULL, NULL, 0, 14, 0}; strStream.buffer = strStream.ptr = callocMemory(15); prettyPrintFlags |= PRETTYPRINT_STRING; while (params != nilCell) { cell = evaluateExpression(params); if(cell->type == CELL_STRING) /* speed optimization for strings */ writeStreamStr(&strStream, (char *)cell->contents, 0); else printCell(cell , FALSE, (UINT)&strStream); params = params->next; } prettyPrintFlags &= ~PRETTYPRINT_STRING; /* cell = stuffStringN(strStream.buffer, strStream.position); */ cell = makeStringCell(strStream.buffer, strStream.position); return(cell); } UINT getAddress(CELL * params) { UINT num; if(params == nilCell) { errorProc(ERR_MISSING_ARGUMENT); return(0); } getEvalDefault(params, ¶ms); #ifndef NEWLISP64 if(params->type == CELL_INT64) { num = *(INT64 *)¶ms->aux; return(num); } else if(params->type == CELL_FLOAT) { num = *(INT64 *)¶ms->aux; return(num); } #else if(params->type == CELL_FLOAT) { num = *(double *)¶ms->contents; return(num); } #endif return(params->contents); } CELL * p_getChar(CELL * params) { return(stuffInteger((UINT)*(char*)getAddress(params))); } CELL * p_getString(CELL * params) { UINT size, len; UINT address; UINT offset = 0; int inc = 1; char * limit; char zeros[4] = {0,0,0,0}; address = getAddress(params); if(address == 0) errorProc(ERR_CANNOT_CONVERT_NULL); /* (get-string
) ; get zero terminated ASCII or UTF-8 */ if(params->next == nilCell) return(stuffString((char *)address)); params = getInteger(params->next, &size); /* (get-string
) ; get no bytes binary data */ if(params == nilCell) return(stuffStringN((char *)address, size)); getStringSize(params, &limit, (size_t *)&len, TRUE); /* (get-string
) ; get until limit or bytes read */ if(len == 4 || len == 2) if(memcmp(limit, zeros, len) == 0) inc = len; while(offset < size) { if(offset < (size - len) && memcmp((char *)(address + offset), limit, len) == 0) break; offset += inc; } /* printf("size: %ld, len: %ld, inc: %d, offset: %ld\n", size, len, inc, offset); */ return(stuffStringN((char *)address, offset)); } CELL * p_getInteger(CELL * params) { return(stuffInteger(*(int *)getAddress(params))); } CELL * p_getLong(CELL * params) { return(stuffInteger64(*(INT64 *)getAddress(params))); } CELL * p_getFloat(CELL * params) { return(stuffFloat(*(double*)getAddress(params))); } CELL * p_address(CELL * params) { if(params == nilCell) return(errorProc(ERR_MISSING_ARGUMENT)); getEvalDefault(params, ¶ms); switch(params->type) { case CELL_LONG: return(stuffInteger((UINT)¶ms->contents)); #ifndef NEWLISP64 case CELL_INT64: case CELL_FLOAT: return(stuffInteger((UINT)¶ms->aux)); #else case CELL_FLOAT: return(stuffInteger((UINT)¶ms->contents)); #endif default: break; } return(stuffInteger(params->contents)); } CELL * p_copyMemory(CELL * params) { UINT toAddress, fromAddress, nBytes; CELL * cell; cell = evaluateExpression(params); params = params->next; fromAddress = getAddress(cell); cell = evaluateExpression(params); toAddress = getAddress(cell); getInteger(params->next, &nBytes); memcpy((char*)toAddress, (char*)fromAddress, nBytes); return(stuffInteger(nBytes)); } CELL * cellTokenString(char * * source, size_t * srclen, char * separator, pcre * re) { char * start; char * src; ssize_t len, slen; int ovector[OVECCOUNT]; int rc; if(*source == NULL) return(stuffString("")); start = src = *source; len = 0; slen = strlen(separator); if(re == NULL) { while(*src != 0) { if(*src == *separator) { if(strncmp(src, separator, slen) == 0) { *source = src+slen; if(**source == 0) /* last token is separator */ *source = NULL; return(stuffStringN(start, len)); } } src++; len++; } *source = src; if(len == 0) return(NULL); return(stuffStringN(start, len)); } else if(*src != 0) { len = *srclen; rc = pcre_exec(re, NULL, src, len, 0, 0, ovector, OVECCOUNT); /* matching failed */ if (rc == -1) { *source = src + len; return(stuffStringN(src, len)); } /* error in pcre_exec() */ if (rc < 0) regexError("error", rc, "when executing"); *source = src + ovector[1]; *srclen -= ovector[1]; if(**source == 0) /* last token is separator */ *source = NULL; if(ovector[1] - ovector[0] == 0) return(NULL); return(stuffStringN(src, ovector[0])); } return(NULL); } CELL * p_parse(CELL * params) { CELL * cell; CELL * newCell; CELL * result; char * string; char * separator; char token[MAX_STRING]; STREAM stream = {NULL, NULL, 0, 0, 0}; int tklen; size_t srclen; /* PCRE stuff */ INT options = 0; pcre *re = NULL; params = getStringSize(params, &string, &srclen, TRUE); if(params == nilCell) separator = NULL; else { params = getString(params, &separator); if(params != nilCell) { /* 10.6.1 also accept string for options */ parseRegexOptions(params, (UINT *)&options, TRUE); /* Compile the regular expression in separator */ re = pcreCachedCompile(separator, options); } else re = NULL; } if(separator == NULL) makeStreamFromString(&stream, string); result = cell = getCell(CELL_EXPRESSION); while(string != NULL) { if(separator != NULL) { if((newCell = cellTokenString(&string, &srclen, separator, re)) == NULL) break; } else { if(getToken(&stream, token, &tklen) == TKN_EMPTY) break; newCell = stuffString(token); } if(cell == result) result->contents = (UINT)newCell; else cell->next = newCell; cell = newCell; if(string == NULL) newCell->next = stuffString(""); } return(result); } #define PACK_NONE 0 #define PACK_CHAR 1 #define PACK_BYTE 2 #define PACK_INT 3 #define PACK_UNSIGNED_INT 4 #define PACK_LONG 5 #define PACK_UNSIGNED_LONG 6 #define PACK_LONG_LONG 7 #define PACK_UNSIGNED_LONG_LONG 8 #define PACK_FLOAT 9 #define PACK_DOUBLE 10 #define PACK_STRING 11 #define PACK_NULL 12 #define PACK_BIG_ENDIAN 13 #define PACK_LITTLE_ENDIAN 14 void swapEndian(char * data, int n) { char tmp[8]; int i; i = n; while(i) { tmp[n - i] = data[i - 1]; i--; } memcpy(data, &tmp[0], n); } CELL * p_pack(CELL * params) { char * format; char * source; char * packed; char * pPtr; CELL * cell; ssize_t length, ln; int len, type; int listFlag = 0; int endianSwitch = 0; char chrV; unsigned char byteV; short int shortV; unsigned short int uint16V; /* 16 bit */ unsigned int uint32V; /* 32 bit */ unsigned long long uint64V; /* 64 bit */ float floatV; double doubleV; #ifdef FFI cell = evaluateExpression(params); params = params->next; if(cell->type == CELL_IMPORT_FFI) return(packFFIstruct(cell, params)); /* nl-import.c */ if(cell->type != CELL_STRING) return(errorProc(ERR_INVALID_PARAMETER)); format = (char *)cell->contents; #else params = getString(params, &format); #endif source = format; length = 0; while((source = parsePackFormat(source, &len, &type)) != NULL) length += len; if(length == 0) return(stuffString("")); pPtr = packed = allocMemory(length); source = format; length = 0; while((source = parsePackFormat(source, &len, &type)) != NULL) { if(type == PACK_NULL) { memset(pPtr, 0, len); pPtr += len; length += len; continue; } else if(type == PACK_LITTLE_ENDIAN || type == PACK_BIG_ENDIAN) { endianSwitch = ((type == PACK_BIG_ENDIAN) != bigEndian); continue; } if(params->type == CELL_NIL) break; if(listFlag) cell = params; else cell = evaluateExpression(params); /* accept data in a list */ if(isList(cell->type)) { cell = (CELL *)cell->contents; params = cell; listFlag = 1; } #ifndef NEWLISP64 if(cell->type == CELL_FLOAT || cell->type == CELL_INT64) uint64V = *(INT64 *)&cell->aux; else /* CELL_LONG and CELL_STRING */ uint64V = cell->contents; #else uint64V = cell->contents; #endif if(type < PACK_FLOAT && cell->type == CELL_FLOAT) uint64V = *(double *)&cell->aux; switch(type) { case PACK_NONE: break; case PACK_BYTE: byteV = (char)uint64V; memcpy(pPtr, &byteV, 1); break; case PACK_CHAR: chrV = (char)uint64V; memcpy(pPtr, &chrV, 1); break; case PACK_INT: shortV = (short int)uint64V; memcpy(pPtr, &shortV, 2); if(endianSwitch) swapEndian(pPtr, 2); break; case PACK_UNSIGNED_INT: uint16V = (unsigned short int)uint64V; memcpy(pPtr, &uint16V, 2); if(endianSwitch) swapEndian(pPtr, 2); break; case PACK_LONG: case PACK_UNSIGNED_LONG: uint32V = (unsigned int)uint64V; memcpy(pPtr, &uint32V, 4); if(endianSwitch) swapEndian(pPtr, 4); break; case PACK_LONG_LONG: case PACK_UNSIGNED_LONG_LONG: memcpy(pPtr, &uint64V, 8); if(endianSwitch) swapEndian(pPtr, 8); break; case PACK_FLOAT: case PACK_DOUBLE: if(cell->type == CELL_FLOAT) doubleV = *(double *)&uint64V; else doubleV = (double)uint64V; if(type == PACK_FLOAT) { floatV = doubleV; memcpy(pPtr, &floatV, 4); if(endianSwitch) swapEndian(pPtr, 4); } else { memcpy(pPtr, &doubleV, 8); if(endianSwitch) swapEndian(pPtr, 8); } break; case PACK_STRING: if(cell->type == CELL_STRING) { ln = cell->aux - 1; if(len <= ln) memcpy(pPtr, (void *)cell->contents, len); else { memcpy(pPtr, (void*)cell->contents, ln); memset(pPtr + ln, 0, len - ln); } } else memset(pPtr, 0, len); break; default: break; } pPtr += len; params = params->next; length += len; } cell = stuffStringN(packed, length); free(packed); return(cell); } char * parsePackFormat(char * format, int * length, int * type) { *length = 0; while(*format == ' ') format++; if(*format == 0) return(NULL); switch(*format) { case '<': *type = PACK_LITTLE_ENDIAN; format++; break; case '>': *type = PACK_BIG_ENDIAN; format++; break; case 'b': *length = 1; *type = PACK_BYTE; format++; break; case 'c': *length = 1; *type = PACK_CHAR; format++; break; case 's': case 'n': *type = (*format == 's') ? PACK_STRING : PACK_NULL; format++; if(isdigit((int)*format) ) { *length = atol(format); while(isdigit((int)*format)) format++; } else *length = 1; break; case 'd': case 'u': *type = (*format == 'd') ? PACK_INT : PACK_UNSIGNED_INT; *length = 2; format++; break; case 'l': if(*(format + 1) == 'd' || *(format + 1) == 'u') { *length = 4; *type = (*(format + 1) == 'd') ? PACK_LONG : PACK_UNSIGNED_LONG; format += 2; } else if(*(format + 1) == 'f') { *length = 8; *type = PACK_DOUBLE; format += 2; } else { *type = PACK_NONE; format++; } break; case 'L': if(*(format + 1) == 'd' || *(format + 1) == 'u') { *length = 8; *type = (*(format + 1) == 'd') ? PACK_LONG_LONG : PACK_UNSIGNED_LONG_LONG; format += 2; } else { *type = PACK_NONE; format++; } break; case 'f': *length = 4; *type = PACK_FLOAT; format++; break; default: *type = PACK_NONE; format++; break; } return(format); } CELL * p_unpack(CELL * params) { CELL * cell; char * format; char * source; char * pPtr; CELL * result; CELL * next = NULL; size_t length, maxlen; int len; int type; int endianSwitch = 0; char chrV; unsigned char byteV; short shortV; unsigned short uint16V; int int32V; unsigned int uint32V; unsigned long long int uint64V; float floatV; double doubleV; cell = evaluateExpression(params->next); if(cell->type == CELL_STRING) { pPtr = (char *)cell->contents; maxlen = cell->aux - 1; } else { getIntegerExt(cell, (void*)&pPtr, FALSE); maxlen = MAX_LONG; } #ifdef FFI cell = evaluateExpression(params); if(cell->type == CELL_IMPORT_FFI) return(unpackFFIstruct(cell, pPtr)); /* nl-import.c */ if(cell->type != CELL_STRING) return(errorProc(ERR_INVALID_PARAMETER)); format = (char *)cell->contents; #else params = getString(params, &format); #endif length = 0; source = format; result = getCell(CELL_EXPRESSION); next = NULL; while( (source = parsePackFormat(source, &len, &type))!= NULL) { if(length + len > maxlen) break; if(type == PACK_LITTLE_ENDIAN || type == PACK_BIG_ENDIAN) { endianSwitch = ((type == PACK_BIG_ENDIAN) != bigEndian); continue; } switch(type) { case PACK_NULL: pPtr += len; length += len; continue; case PACK_BYTE: memcpy(&byteV, pPtr, 1); cell = makeCell(CELL_LONG, byteV); break; case PACK_CHAR: memcpy(&chrV, pPtr, 1); cell = makeCell(CELL_LONG, chrV); break; case PACK_INT: memcpy(&shortV, pPtr, 2); if(endianSwitch) swapEndian((char*)&shortV, 2); cell = makeCell(CELL_LONG, shortV); break; case PACK_UNSIGNED_INT: memcpy(&uint16V, pPtr, 2); if(endianSwitch) swapEndian((char*)&uint16V, 2); cell = makeCell(CELL_LONG, uint16V); break; case PACK_LONG: memcpy(&int32V, pPtr, 4); if(endianSwitch) swapEndian((char *)&int32V, 4); #ifndef NEWLISP64 cell = getCell(CELL_INT64); *(INT64 *)&cell->aux = int32V; #else cell = getCell(CELL_LONG); *(INT *)&cell->contents = int32V; #endif break; case PACK_UNSIGNED_LONG: memcpy(&uint32V, pPtr, 4); if(endianSwitch) swapEndian((char*)&uint32V, 4); #ifndef NEWLISP64 cell = getCell(CELL_INT64); *(INT64 *)&cell->aux = uint32V; #else cell = makeCell(CELL_LONG, uint32V); #endif break; case PACK_LONG_LONG: case PACK_UNSIGNED_LONG_LONG: memcpy(&uint64V, pPtr, 8); if(endianSwitch) swapEndian((char*)&uint64V, 8); #ifndef NEWLISP64 cell = getCell(CELL_INT64); memcpy(&cell->aux, &uint64V, 8); #else cell = makeCell(CELL_LONG, uint64V); #endif break; case PACK_FLOAT: memcpy(&floatV, pPtr, 4); if(endianSwitch) swapEndian((char*)&floatV, 4); doubleV = floatV; cell = stuffFloat(doubleV); break; case PACK_DOUBLE: memcpy(&doubleV, pPtr, 8); if(endianSwitch) swapEndian((char*)&doubleV, 8); cell = stuffFloat(doubleV); break; case PACK_STRING: if(pPtr == NULL) errorProc(ERR_CANNOT_CONVERT_NULL); cell = stuffStringN(pPtr, len); break; default: cell = getCell(CELL_NIL); break; } pPtr += len; length += len; if(next == NULL) result->contents = (UINT)cell; else next->next = cell; next = cell; } return(result); } CELL * p_trim(CELL * params) { char * str; char * ptr; size_t left = 0, right = 0, len; char * trimChr; int lchr, rchr; #ifdef SUPPORT_UTF8 int * wstr; int * wptr; CELL * result; #endif params = getStringSize(params, &str, &len, TRUE); if(params == nilCell) { ptr = str; while((unsigned char)*str <= 32 && *str != 0) str++, left++; if(left == len) return(stuffString("")); str = ptr + len -1; while((unsigned char)*str <= 32) str--, right++; return(stuffStringN(ptr + left, len - left - right)); } #ifndef SUPPORT_UTF8 ptr = str; #else len = utf8_wlen(str, str + len + 1); wptr = wstr = allocMemory((len + 1) * sizeof(int)); len = utf8_wstr(wstr, str, len); #endif if(len == 0) return(stuffString(str)); params = getString(params, &trimChr); #ifndef SUPPORT_UTF8 lchr = *trimChr; #else utf8_wchar(trimChr, &lchr); #endif if(params != nilCell) { getString(params, &trimChr); #ifndef SUPPORT_UTF8 rchr = *trimChr; #else utf8_wchar(trimChr, &rchr); #endif } else rchr = lchr; #ifndef SUPPORT_UTF8 while(*str == lchr) str++, left++; #else while(*wstr == lchr) wstr++, left++; #endif if(left == len) return(stuffString("")); #ifndef SUPPORT_UTF8 str = ptr + len - 1; while(*str == rchr && right < len) str--, ++right; return(stuffStringN(ptr + left, len - left - right)); #else wstr = wptr + len - 1; while(*wstr == rchr && right < len) wstr--, ++right; *(wptr + len - right) = 0; str = allocMemory((len - left - right + 1) * UTF8_MAX_BYTES); len = wstr_utf8(str, wptr + left, (len - left - right) * UTF8_MAX_BYTES); free(wptr); result = stuffStringN(str, len); free(str); return(result); #endif } /* ------- PRCE Perl Compatible Regular Expressions for all platforms ------- */ /* see also http://www.pcre.org/ */ /* currently PCRE v.5.0 from 13-Sep-2004 -> pcre.h */ /* additional regex option */ #define REPLACE_ONCE 0x8000 #define PRECOMPILED 0x10000 void regexError(char * msg_1, int num, const char * msg_2); /* PCRE options from manual not allowed from newLISP regex options in pcre_compile() */ #define PCRE_NOT_COMPILE (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY) /* PCRE options from manual allowed for pcre_exec() */ #define PCRE_EXEC_OPTIONS (PCRE_ANCHORED | PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY) CELL * p_regex(CELL * params) { pcre *re; int ovector[OVECCOUNT]; int rc, idx; char * pattern; char * subject; #ifdef SUPPORT_UTF8 char * string; #endif INT options = 0; UINT offset = 0; int len; size_t size; CELL * cell, * result, * strCell; params = getString(params, &pattern); params = getStringSize(params, &subject, &size, TRUE); if(params != nilCell) { /* 10.6.1 also accept string for options */ params = parseRegexOptions(params, (UINT *)&options, TRUE); if(params != nilCell) getInteger(params, &offset); } /* Compile the regular expression in the first argument */ re = pcreCachedCompile(pattern, (int)options); /* Compilation succeeded: match the subject in the second argument */ rc = pcre_exec( re, /* the compiled pattern */ NULL, /* no extra data - we didn't study the pattern */ subject, /* the subject string */ size, /* the length of the subject */ (int)offset, /* start at offset 0 in the subject */ (int)options & PCRE_EXEC_OPTIONS, ovector, /* output vector for substring information */ OVECCOUNT); /* number of elements in the output vector */ /* Matching failed */ if (rc == -1) return(nilCell); /* error in pcre_exec() */ if (rc < 0) regexError("error", rc, "when executing"); /* Match succeded */ /* Show substrings stored in the output vector */ result = cell = getCell(CELL_EXPRESSION); for(idx = 0; idx < rc; idx++) { len = ovector[2*idx+1] - ovector[2*idx]; strCell = stuffStringN(subject + ovector[2*idx], len); deleteList((CELL*)sysSymbol[idx]->contents); sysSymbol[idx]->contents = (UINT)copyCell(strCell); if(idx == 0) { cell->contents = (UINT)strCell; cell = (CELL *)cell->contents; } else { cell->next = strCell; cell = cell->next; } #ifdef SUPPORT_UTF8 if(2048 & options) cell->next = stuffInteger(utf8_wlen(subject, subject + ovector[2*idx] + 1)); else #endif cell->next = stuffInteger(ovector[2*idx]); cell = cell->next; #ifdef SUPPORT_UTF8 if(2048 & options) { string = (char *)strCell->contents; cell->next = stuffInteger(utf8_wlen(string, string + len + 1)); } else #endif cell->next = stuffInteger(len); cell = cell->next; } return(result); } CELL * p_regexComp(CELL * params) { char * pattern; INT options = 0; pcre * re; int rc; size_t size; CELL * result; params = getString(params, &pattern); if(params != nilCell) /* 10.6.1 also accept string for options */ params = parseRegexOptions(params, (UINT *)&options, TRUE); /* Compile the regular expression in the first argument */ re = pcreCachedCompile(pattern, (int)options); rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); if(rc < 0) return(nilCell); result = stuffStringN((char *)re, size); return(result); } ssize_t searchBufferRegex(char * string, size_t offset, char * pattern, size_t length, int options, size_t * len) { pcre *re; int ovector[OVECCOUNT]; int rc, idx; CELL * cell; options &= ~REPLACE_ONCE ; /* turn custom bits off for PCRE */ /* Compile the regular expression in the first argument */ re = pcreCachedCompile(pattern, options); /* Compilation succeeded: match the subject in the second argument */ rc = pcre_exec(re, NULL, string, length, offset, options & PCRE_EXEC_OPTIONS, ovector, OVECCOUNT); /* matching failed */ if (rc == -1) return(-1); /* error in pcre_exec() */ if (rc < 0) regexError("error", rc, "when executing"); for(idx = 0; idx < rc; idx++) { cell = stuffStringN(string + ovector[2*idx], ovector[2*idx+1] - ovector[2*idx]); deleteList((CELL*)sysSymbol[idx]->contents); sysSymbol[idx]->contents = (UINT)cell; } if(len != NULL) *len = ovector[1] - ovector[0]; return((UINT)ovector[0]); } pcre * pcreCachedCompile(char * pattern, int options) { const char * error; int errOffset; static char * cPattern = NULL; static pcre * re = NULL; static int cacheOptions = -1; UINT len; if(options & 0x10000) return((pcre *)pattern); if((cPattern == NULL) || (strcmp(cPattern, pattern) != 0) || (options != cacheOptions)) { cacheOptions = options; if(cPattern != NULL) freeMemory(cPattern); len = strlen(pattern); cPattern = (char *)allocMemory(len + 1); memcpy(cPattern, pattern, len + 1); if(re != NULL) (pcre_free)(re); options = options & ~PCRE_NOT_COMPILE; re = pcre_compile(pattern, options, &error, &errOffset, NULL); /* Compilation failed: print the error message and exit */ if (re == NULL) { freeMemory(cPattern); cPattern = NULL; regexError("offset", errOffset, error); } } return(re); } void regexError(char * msg_1, int number, const char * msg_2) { CELL * cell; char * errorBuff = malloc(256); snprintf(errorBuff, 256, "%s %d %s", msg_1, number, msg_2); cell = stuffString(errorBuff); free(errorBuff); fatalError(ERR_REGEX, cell, 0); } /* replace string with or without (options = -1) regular expressions */ typedef struct { int offset; int length; char * repStr; int repLen; void * next; } REGEX; void freeRegex(REGEX * regex); char * replaceString (char * keyStr, size_t keyLen, char * buff, size_t buffLen, CELL * exprCell, UINT * cnt, int options, size_t * newLen) { ssize_t oldLen, findPos; size_t count, offset; size_t repLen; char * newBuff; CELL * cell; REGEX * start_rx = NULL, * end_rx = NULL; UINT * resultStackIdxSave; int errNo; int bias = 0; *newLen = oldLen = 0; *cnt = 0; offset = count = 0; /* save all found string and fill sys variables $0, $1 ... etc */ while(offset <= buffLen) { if(options == -1) findPos = searchBuffer(buff + offset, buffLen - offset, keyStr, keyLen, TRUE); else { findPos = searchBufferRegex(buff, offset, keyStr, buffLen, options, &keyLen); itSymbol->contents = sysSymbol[0]->contents; } if(findPos == -1) break; if(options != -1) findPos -= offset; if(count == 0) start_rx = end_rx = (REGEX *)callocMemory(sizeof(REGEX)); else { end_rx->next = callocMemory(sizeof(REGEX)); end_rx = end_rx->next; } end_rx->next = NULL; end_rx->offset = findPos + offset; /* pos where pattern found */ resultStackIdxSave = resultStackIdx; countCell->contents++; if((cell = evaluateExpressionSafe(exprCell, &errNo)) == NULL) { freeRegex(start_rx); longjmp(errorJump, errNo); } end_rx->length = keyLen; /* length of pattern found */ if(cell->type == CELL_STRING) { repLen = end_rx->repLen = cell->aux-1; end_rx->repStr = (char *)allocMemory(repLen + 1); /* replacement string */ memcpy(end_rx->repStr, (char *)cell->contents, cell->aux); } else /* if replacement expression is not string leave old content */ { repLen = end_rx->repLen = keyLen; end_rx->repStr = (char *)callocMemory(repLen + 1); memcpy(end_rx->repStr, buff + offset + findPos, keyLen); } cleanupResults(resultStackIdxSave); count++; offset += findPos + keyLen; /* next start for search */ oldLen += keyLen; /* space occupied by old content */ *newLen += repLen; /* space needed by new replacement content */ bias = (keyLen == 0); offset += bias; if(options != -1) if(options & REPLACE_ONCE) break; } if(count == 0) return(NULL); *newLen = buffLen - oldLen + *newLen; newBuff = callocMemory(*newLen + 1); end_rx = start_rx; /* count is now offset into the new buffer */ *cnt = count; count = start_rx->offset; memcpy(newBuff, buff, count); while(start_rx != NULL) /* replace */ { repLen = start_rx->repLen; memcpy(newBuff + count, start_rx->repStr, repLen); count += repLen; freeMemory(start_rx->repStr); end_rx = start_rx->next; if(end_rx != NULL) /* copy from buffer */ { repLen = end_rx->offset - start_rx->offset - start_rx->length; memcpy(newBuff + count, buff + start_rx->offset + start_rx->length, repLen); count += repLen; } freeMemory(start_rx); start_rx = end_rx; } /* printf("count %d buffLen %d offset %d bias %d str %s lencpy %d\n", count, buffLen, offset, bias, buff + offset - bias, buffLen - offset + bias); */ memcpy(newBuff + count, buff + offset - bias, buffLen - offset + bias); itSymbol->contents = (UINT)nilCell; return(newBuff); } void freeRegex(REGEX * regex) { REGEX * oldRegex; while(regex != NULL) { if(regex->repStr != NULL) freeMemory(regex->repStr); oldRegex = regex; regex = regex->next; free(oldRegex); } } /* support strings in the integer-options field of: directory, ends-with, find, find-all, parse, regex, replace, search, starts-with instead of: (find "xyz" buff (| 1 4 2048)) example: (find "x.z" "abcX\nZ" "is") => 3 for PCRE_CASELESS, PCRE_DOTALL */ CELL* parseRegexOptions(CELL* params, UINT * options, int evalFlag) { CELL* cell; int i; char* buffer; if (evalFlag) cell = evaluateExpression(params); else cell = params; *options = 0; if (cell->type == CELL_STRING) { buffer = (char*)cell->contents; for (i = 0; i < cell->aux - 1; i ++) { switch (buffer[i]) { case 'i': *options |= 1; break; /* PCRE_CASELESS */ case 'm': *options |= 2; break; /* PCRE_MULTILINE */ case 's': *options |= 4; break; /* PCRE_DOTALL */ case 'x': *options |= 8; break; /* PCRE_EXTENDED */ case 'A': *options |= 16; break; /* PCRE_ANCHORED */ case 'D': *options |= 32; break; /* PCRE_DOLLAR_ENDONLY */ case 'U': *options |= 512; break; /* PCRE_UNGREEDY */ case 'u': *options |= 2048; break; /* PCRE_UTF8 */ default: return(errorProcExt(ERR_INVALID_OPTION, cell)); } } } else { getIntegerExt(cell, options, FALSE); } return (params->next); } /* eof */