Takes a string and unescapes "\" style escape codes
Code | Replace |
\\ | \ |
\" | " |
\s | space |
\n | new line |
\t | tab (4 spaces) |
\uxxxx | xxxx is a hex string, of the unicode character 0000xxxx |
\Uxxxxxxxx | xxxxxxxx is a hex string, of the unicode character xxxxxxxx |
\hXxxx... | X is a hex character, it's value is the number of pairs of hex characters that follow it, is a raw byte of a utf-8 string, be sure they are valid characters |
Two versions of the function, one works entirely by string functions, the other by list & string.
The one that uses lists is slower.
string byte2hex(integer x)
{//Helper function for use with unicode characters.
string hexc="0123456789ABCDEF";
return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}
string Unescape(string a)
{
//Please visit the website for instructions about this function (and view it's talk page for the authors notes)
//http://lslwiki.com/lslwiki/wakka.php?wakka=FunctionUnescape
string b = a;
integer c = -1;
integer d;
integer e;
integer f = 0;
string g;
while(d = -~llSubStringIndex(b, "\\"))
{
c += d;
if(2 < e = llSubStringIndex("uUhts\"\\n",llGetSubString(b,d,d)))
a = llInsertString(llDeleteSubString(a,c,-~c), c, llGetSubString(" \"\\\n",e * (e !=3), e));
else if(e==2)//rx[11,22,33,44,55,66,77,88,99,AA,BB,CC,DD,EE,FF]
{
g = "";
e = (integer)("0x"+llGetSubString(b,-~d,-~d)) << 1;
if(d - ~e >= (f = llStringLength(b)))
{
e = (f - d) & -2;
b += "0";
}
if((f = e))//this may look like a mistake, it's not
{
do
g = "%"+llGetSubString(b,d + e,d - ~e) + g;
while((e-=2) > 0);
}
a = llInsertString(llDeleteSubString(a,c, c + 2 + f),c, g = llUnescapeURL(g));
c += ~-llStringLength(g);//add to c so we don't accidentily unescape result
}
else if(~e)// \uXXXX or \UXXXXXXXX
{
a = llDeleteSubString(a, c, c + 5 + (e = e << 4));
if(0 < e = (integer)("0x"+llGetSubString(b,-~d, d +4 + e)))
{
if (e >= 0x4000000)// || i < 0)//if only unicode supported negives...
f = 5;
else if (e >= 0x200000)
f = 4;
else if (e >= 0x10000)
f = 3;
else if (e >= 0x800)
f = 2;
else
f = (e >= 0x80);
g = "%" + byte2hex((e >> (6 * f)) | ((0x7F80 >> f) << !f));
while(f)
g += "%" + byte2hex((((e >> (6 * (f=~-f))) | 0x80) & 0xBF));
a = llInsertString(a, c, llUnescapeURL(g));
}
}
b = llDeleteSubString(a,0,c);
}
return a;
}
string UnescapeTight(string a)
{//Requires less memory then the other two versions
//Please visit the website for instructions about this function (and view it's talk page for the authors notes)
http://lslwiki.com/lslwiki/wakka.php?wakka=FunctionUnescape
string b = a;
integer c = -1;
integer f = 0;
@loop;
integer d = -~llSubStringIndex(b, "\\");
if(d)
{
integer e = llSubStringIndex("uUhts\"\\n", llGetSubString(a, -~(c += d), -~c));
if(2 < e)
a = llInsertString(llDeleteSubString(a,c, -~c), c, llGetSubString(" \"\\\n", e * (e != 3), e));
else if(e == 2)
{
if(c+(e = (integer)("0x"+llGetSubString(a,c+2,c+2)) << 1)+2 >= (f = llStringLength(b)))
e = (f + ~d) & -2;
if((f = e))//this may look like a mistake, it's not
{
b = "";
do
b = "%"+llGetSubString(a,c - ~e,c + e + 2) + b;
while((e-=2) > 0);
}
a = llInsertString(llDeleteSubString(a,c, c + f + 2),c, b = llUnescapeURL(b));
c = c + ~-llStringLength(b);//add to c so we don't accidentily unescape result
}
else if(~e)// \uXXXX or \UXXXXXXXX
{
a = llDeleteSubString(a, c, c - ~(e = 4 << e));
if(0 < e = (integer)("0x" + llGetSubString(b, -~d, d + e)))
{
b = "%" + byte2hex((e >> (6 * f)) | ((0x7F80 >> f) <<
!(f = ((e >= 0x80) + (e >= 0x800) + (e >= 0x10000) + (e >= 0x200000) + (e >= 0x4000000)))));
while(f)
b += "%" + byte2hex((((e >> (6 * (f=~-f))) | 0x80) & 0xBF));
a = llInsertString(a, c, llUnescapeURL(b));
}
}
b = llDeleteSubString(a,0,c);
jump loop;
}
return a;
}
string UnescapeSlow(string a)
{
//Please visit the website for instructions about this function (and view it's talk page for the authors notes)
//http://rpgstats.com/wiki/index.php?title=FunctionUnescape
// 0 1 2 3 4 5 6 7
list b = llCSV2List("\\\\,\\n,\\\",\\t,\\s,\\h,\\u,\\U");//"//wiki fix; slightly less memory then a staticly allocated list
list c = llParseString2List(a,[a = ""],b);//totaly wrong way to wipe "a" but it cleans up the stack nicely
integer d = ([] != c);
string f;
integer g;
integer e;
while(d & 0x80000000)
{
if(~g = llListFindList(b, llList2List(c,d,d)))
{
if(g < 5)
f += llGetSubString("\\\n\"\t",g,g - ((g == 3) << 2));
else
{
e = ((g > 5) << (g == 7)) << 2;
string h = llList2String(c, -~d);
integer i = llStringLength(h);
@funk;
if(d < -2 && e > i)//no point if there is only 1 entry left.
{//misformated string handler
i = llStringLength(h = llDeleteSubString(a = (string)llDeleteSubList(c, 0, d), e, -1));
d = -2 - ((c = llParseString2List(llGetSubString(a, e, -1),[a = ""],b)) != []);
}
if(g == 5)
{
if((g = (integer)("0x"+llGetSubString(h,0,0))))
{//no point if it is zero
e = -~(g << 1);
g = 8;
jump funk;//we adjust e to reflect the dynamic nature of the value.
//then we can parse it staticly, well almost.
}
else if(~llListFindList(b, [h]))//catch for "\\\\"
{//oh you are evil, trying to break the script, we won't have any of that
d = ~((c = llParseString2List(llDeleteSubString((string)llDeleteSubList(c, 0, d), 0, 0),[],b)) != []);
}
else//remove the required character and clean-up the list.
{//Since we are using negitive indexs, we don't need to recalc them
c = llDeleteSubString(h,0,0) + llDeleteSubList(c, 0, -~d);
}
}
else
{
if(g == 8)
{//adds extra characters.
if(i & 0xFFFffFFE)
{
a = llGetSubString(h, 1, i = (e & -2));
do
a = llInsertString(a,i-=2,"%");
while(i);
}
}
else //if(g == 6 || g == 7)
{
a = "";
if(0 < i = (integer)("0x"+llDeleteSubString(h, e, 0x7FFFFFFE)))
{
if (i >= 0x4000000)// || i < 0)//if only unicode supported negives...
g = 5;
else if (i >= 0x200000)
g = 4;
else if (i >= 0x10000)
g = 3;
else if (i >= 0x800)
g = 2;
else
g = (i >= 0x80);
a = "%" + byte2hex((i >> (6 * g)) | ((0x7F80 >> g) << !g));
while(g)
a += "%" + byte2hex((((i >> (6 * (g=~-g))) | 0x80) & 0xBF));
}
}
f += llUnescapeURL(a) + llDeleteSubString(h,0,~-e);
}
}
}
else
f += llList2String(c,d);
d += 2 >> !e;
}
return f;
}
Testbed: