I came accross a strange javascript behavior today which is probably due to some character encoding issue. The length function returns two different character count for what is apparently the exact same string. In one instance the string was copy pasted from a database value, in the second instance I manually wrote the characters with my keyboard. I'm sure this is UTF related but I cant figure how to get the "correct" character count. Is there a way to know which encoding the faulty string is in and "fix" it somehow? Is there a way to force every strings in my app to be UTF-8 ? Is there a hidden character somewhere ?
Thanks for your help
var utils = {
/**
* cleans up our url before db insertion
*
* @param url
* @returns {String} the cleaned url
*/
cleanUrl : function(url){
url = url.trim().toLowerCase();
if(url.includes('?'))return url;
var lastChar = url.charAt(url.length-1);
console.log('lastchar = ' + lastChar);
if(lastChar == '/'){
url=url.substring(0, url.length-1);
}
return url;
},
doTest : function(){
var url = "https://bitcointalk.org/"; //this string was taken from DB
console.log('url length ' + url.length);
console.log('url length ' + url.trim().length);
var cleaned = this.cleanUrl(url);
console.log('cleaned length ' + cleaned.length);
console.log('cleaned ' + cleaned);
console.log('------------------------------');
var url2 = "https://bitcointalk.org/"; //this string was manually written
console.log('url2 length ' + url2.length);
console.log('url2 length ' + url2.trim().length);
var cleaned2 = this.cleanUrl(url2);
console.log('cleaned2 length ' + cleaned2.length);
console.log('cleaned2 ' + cleaned2);
}
};
utils.doTest()
And here is the output :
url length 25
url length 25
lastchar =
cleaned length 25
cleaned https://bitcointalk.org/
------------------------------
url2 length 24
url2 length 24
lastchar = /
cleaned2 length 23
cleaned2 https://bitcointalk.org
You are correct! There is a secret character encoded from the DB you can see if you copy both of your strings out and try it in your browser console.
I have tested your string which is copied from DB and it contains some special characters. So for that you can use encodeURIComponent()
method of javascript on that string and then save that encoded string in DB and while retrieving perform decodeURIComponent()
on that string.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With