143 lines
4.9 KiB
C++
143 lines
4.9 KiB
C++
#include "escape.h"
|
|
#include <stdexcept>
|
|
|
|
using namespace ts;
|
|
using namespace std;
|
|
|
|
std::string query::escape(std::string in) {
|
|
size_t index = 0;
|
|
while(index < in.length()) {
|
|
if(in[index] == '\\')
|
|
in.replace(index, 1, "\\\\", 2);
|
|
else if(in[index] == ' ')
|
|
in.replace(index, 1, "\\s", 2);
|
|
else if(in[index] == '/')
|
|
in.replace(index, 1, "\\/", 2);
|
|
else if(in[index] == '|')
|
|
in.replace(index, 1, "\\p", 2);
|
|
else if(in[index] == '\b')
|
|
in.replace(index, 1, "\\b", 2);
|
|
else if(in[index] == '\f')
|
|
in.replace(index, 1, "\\f", 2);
|
|
else if(in[index] == '\n')
|
|
in.replace(index, 1, "\\n", 2);
|
|
else if(in[index] == '\r')
|
|
in.replace(index, 1, "\\r", 2);
|
|
else if(in[index] == '\t')
|
|
in.replace(index, 1, "\\t", 2);
|
|
else if(in[index] == '\x07')
|
|
in.replace(index, 1, "\\a", 2);
|
|
else if(in[index] == '\x0B')
|
|
in.replace(index, 1, "\\v", 2);
|
|
else {
|
|
index += 1;
|
|
continue;
|
|
}
|
|
index += 2;
|
|
}
|
|
|
|
return in;
|
|
}
|
|
|
|
/*
|
|
if (input[i] = $ED) and // Prüfe auf High and Low Surrogate
|
|
(input[i+3] = $ED) and
|
|
(input[i+1] or $F = $AF) and
|
|
(input[i+4] or $F = $BF) then
|
|
begin
|
|
output := output +
|
|
AnsiChar((input[i+1] and $F + 1) shr 2 and 7 or $F0) + // and 7 kann man auch weglassen
|
|
AnsiChar((input[i+2] shr 2 and $F + (input[i+1] and $F + 1) shl 4 and $30) or $80) +
|
|
AnsiChar((input[i+4] and $F + input[i+2] shl 4 and $30) or $80) +
|
|
AnsiChar(input[i+5]);
|
|
end
|
|
else
|
|
output := output + AnsiChar(input);
|
|
*/
|
|
std::string query::unescape(std::string in, bool throw_error) {
|
|
size_t index = 0;
|
|
while(index < in.length()){
|
|
if(in[index] == '\\'){
|
|
if(in.length() <= index + 1) break;
|
|
char replace = 0;
|
|
|
|
switch (in[index + 1]){
|
|
case 's': replace = ' '; break;
|
|
case '/': replace = '/'; break;
|
|
case 'p': replace = '|'; break;
|
|
case 'b': replace = '\b'; break;
|
|
case 'f': replace = '\f'; break;
|
|
case 'n': replace = '\n'; break;
|
|
case 'r': replace = '\r'; break;
|
|
case 't': replace = '\t'; break;
|
|
case 'a': replace = '\x07'; break;
|
|
case 'v': replace = '\x0B'; break;
|
|
case '\\': replace = '\\'; break;
|
|
default:
|
|
replace = '\x00'; break;
|
|
}
|
|
if(replace != 0x00)
|
|
in.replace(index, 2, string(&replace, 1));
|
|
}
|
|
|
|
uint8_t current = (uint8_t) in[index];
|
|
if(in.length() - index >= 6) { //Check for CESU-8
|
|
if((current == 0xED) && ((uint8_t) in[index + 3] == 0xED) && ((((uint8_t) in[index + 1] | 0xF) == 0xAF)) && ((((uint8_t) in[index + 4] | 0xF) == 0xBF))){
|
|
char replaced[4];
|
|
|
|
replaced[0] = (((((uint8_t) in[index + 1] & 0xF) + 1) >> 2) & 7) | 0xF0;
|
|
replaced[1] = ((((uint8_t) in[index + 2] >> 2) & 0xF) + (((((uint8_t) in[index + 1] & 0xF)) + (1 << 4)) | 0x80));
|
|
replaced[2] = (((uint8_t) in[index + 4] & 0xF) + (((uint8_t) in[index + 2] << 4) & 0x30)) | 0x80;
|
|
replaced[3] = ((uint8_t) in[index + 5]);
|
|
|
|
in.replace(index, 6, string(replaced, 4));
|
|
index -= 2; //Substract the replace difference
|
|
|
|
index += 5; //Skip 5 bytes (CESU-8 length for emoty)
|
|
}
|
|
}
|
|
//"\221\210\200\200\200\210\221\221\221\221\204\210\221\221\221\221\221\221\221\221\221\204\210\221\221\221\221\221\204\210\221\221"
|
|
if(current >= 128) { //UTF8 check
|
|
if(current >= 192 && (current <= 193 || current >= 245)) {
|
|
in.replace(index, 1, "", 0); //Cut the character out
|
|
index--;
|
|
} else if(current >= 194 && current <= 223) {
|
|
if(in.length() - index <= 1)
|
|
in.replace(index, in.length() - index, "", 0);
|
|
else if((uint8_t) in[index + 1] >= 128 && (uint8_t) in[index + 1] <= 191) index += 1; //Valid
|
|
else {
|
|
if(throw_error) throw invalid_argument("Invalid UTF-8 character at index " + to_string(index));
|
|
in.replace(index, 2, "", 0); //Cut the two characters out
|
|
index--;
|
|
}
|
|
} else if(current >= 224 && current <= 239) {
|
|
if(in.length() - index <= 2)
|
|
in.replace(index, in.length() - index, "", 0);
|
|
else if((uint8_t) in[index + 1] >= 128 && (uint8_t) in[index + 1] <= 191 &&
|
|
(uint8_t) in[index + 2] >= 128 && (uint8_t) in[index + 2] <= 191) index += 2; //Valid
|
|
else {
|
|
if(throw_error) throw invalid_argument("Invalid UTF-8 character at index " + to_string(index));
|
|
in.replace(index, 3, "", 0); //Cut the three characters out
|
|
index--;
|
|
}
|
|
} else if(current >= 240 && current <= 244) {
|
|
if(in.length() - index <= 3)
|
|
in.replace(index, in.length() - index, "", 0);
|
|
else if((uint8_t) in[index + 1] >= 128 && (uint8_t) in[index + 1] <= 191 &&
|
|
(uint8_t) in[index + 2] >= 128 && (uint8_t) in[index + 2] <= 191 &&
|
|
(uint8_t) in[index + 3] >= 128 && (uint8_t) in[index + 3] <= 191) index += 3; //Valid
|
|
else {
|
|
if(throw_error) throw invalid_argument("Invalid UTF-8 character at index " + to_string(index));
|
|
in.replace(index, 4, "", 0); //Cut the three characters out
|
|
index--;
|
|
}
|
|
} else {
|
|
if(throw_error) throw invalid_argument("Invalid UTF-8 character at index " + to_string(index));
|
|
in.replace(index, 1, "", 0); //Cut the character out
|
|
index--;
|
|
}
|
|
}
|
|
index++;
|
|
}
|
|
return in;
|
|
} |