Convert special characters to HTML in JavaScript

Javascript

Javascript Problem Overview


How can I convert special characters to HTML in JavaScript?

Example:

  • & (ampersand) becomes &amp.
  • " (double quote) becomes &quot when ENT_NOQUOTES is not set.
  • ' (single quote) becomes &#039 only when ENT_QUOTES is set.
  • < (less than) becomes &lt.
  • > (greater than) becomes &gt.

Javascript Solutions


Solution 1 - Javascript

The best way in my opinion is to use the browser's inbuilt HTML escape functionality to handle many of the cases. To do this simply create a element in the DOM tree and set the innerText of the element to your string. Then retrieve the innerHTML of the element. The browser will return an HTML encoded string.

function HtmlEncode(s)
{
  var el = document.createElement("div");
  el.innerText = el.textContent = s;
  s = el.innerHTML;
  return s;
}

Test run:

alert(HtmlEncode('&;\'><"'));

Output:

&amp;;'&gt;&lt;"

This method of escaping HTML is also used by the Prototype JS library though differently from the simplistic sample I have given.

Note: You will still need to escape quotes (double and single) yourself. You can use any of the methods outlined by others here.

Solution 2 - Javascript

You need a function that does something like

return mystring.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;").replace(/"/g, "&quot;");

But taking into account your desire for different handling of single/double quotes.

Solution 3 - Javascript

This generic function encodes every nonalphabetic character to its HTML code (numeric character reference (NCR)):

function HTMLEncode(str) {
    var i = str.length,
        aRet = [];

    while (i--) {
        var iC = str[i].charCodeAt();
        if (iC < 65 || iC > 127 || (iC>90 && iC<97)) {
            aRet[i] = '&#'+iC+';';
        } else {
            aRet[i] = str[i];
        }
    }
    return aRet.join('');
}

[edit 2022] More modern approach:

const toHtmlEntities = (str, showInHtml = false) => 
  [...str].map( v => `${showInHtml ? `&amp;#` : `&#`}${v.charCodeAt(0)};`).join(``);
const str = `&Hellõ Wórld`;

document.body.insertAdjacentHTML(`beforeend`, `<ul>
  <li>Show the entities (<code>toHtmlEntities(str, true)</code>): <b>${
    toHtmlEntities(str, true)}</b></li>
  <li>Let the browser decide (<code>toHtmlEntities(str)</code>): <b>${
    toHtmlEntities(str)}</b></li>
  <li id="textOnly"></li></ul>`);
document.querySelector(`#textOnly`).textContent = `As textContent: ${
  toHtmlEntities(str)}`;

body {
  font: 14px / 18px "normal verdana", arial;
  margin: 1rem;
}

code {
  background-color: #eee;
}

Solution 4 - Javascript

For those who want to decode an integer char code like &#xxx; inside a string, use this function:

function decodeHtmlCharCodes(str) { 
  return str.replace(/(&#(\d+);)/g, function(match, capture, charCode) {
    return String.fromCharCode(charCode);
  });
}

// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

ES6

const decodeHtmlCharCodes = str => 
  str.replace(/(&#(\d+);)/g, (match, capture, charCode) => 
    String.fromCharCode(charCode));

// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

Solution 5 - Javascript

Create a function that uses string replace

function convert(str)
{
  str = str.replace(/&/g, "&amp;");
  str = str.replace(/>/g, "&gt;");
  str = str.replace(/</g, "&lt;");
  str = str.replace(/"/g, "&quot;");
  str = str.replace(/'/g, "&#039;");
  return str;
}

Solution 6 - Javascript

From Mozilla ...

>Note that charCodeAt will always return a value that is less than 65,536. This is because the higher code points are represented by a pair of (lower valued) "surrogate" pseudo-characters which are used to comprise the real character. Because of this, in order to examine or reproduce the full character for individual characters of value 65,536 and above, for such characters, it is necessary to retrieve not only charCodeAt(i), but also charCodeAt(i+1) (as if examining/reproducing a string with two >letters).

The Best Solution

/**
 * (c) 2012 Steven Levithan <http://slevithan.com/>
 * MIT license
 */
if (!String.prototype.codePointAt) {
    String.prototype.codePointAt = function (pos) {
        pos = isNaN(pos) ? 0 : pos;
        var str = String(this),
            code = str.charCodeAt(pos),
            next = str.charCodeAt(pos + 1);
        // If a surrogate pair
        if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
            return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
        }
        return code;
    };
}

/**
 * Encodes special html characters
 * @param string
 * @return {*}
 */
function html_encode(string) {
    var ret_val = '';
    for (var i = 0; i < string.length; i++) { 
        if (string.codePointAt(i) > 127) {
            ret_val += '&#' + string.codePointAt(i) + ';';
        } else {
            ret_val += string.charAt(i);
        }
    }
    return ret_val;
}

Usage example:

html_encode("✈");

Solution 7 - Javascript

As was mentioned by dragon the cleanest way to do it is with jQuery:

function htmlEncode(s) {
    return $('<div>').text(s).html();
}

function htmlDecode(s) {
    return $('<div>').html(s).text();
}

Solution 8 - Javascript

function char_convert() {
    
    var chars = ["©","Û","®","ž","Ü","Ÿ","Ý","$","Þ","%","¡","ß","¢","à","£","á","À","¤","â","Á","¥","ã","Â","¦","ä","Ã","§","å","Ä","¨","æ","Å","©","ç","Æ","ª","è","Ç","«","é","È","¬","ê","É","­","ë","Ê","®","ì","Ë","¯","í","Ì","°","î","Í","±","ï","Î","²","ð","Ï","³","ñ","Ð","´","ò","Ñ","µ","ó","Õ","¶","ô","Ö","·","õ","Ø","¸","ö","Ù","¹","÷","Ú","º","ø","Û","»","ù","Ü","@","¼","ú","Ý","½","û","Þ","€","¾","ü","ß","¿","ý","à","‚","À","þ","á","ƒ","Á","ÿ","å","„","Â","æ","…","Ã","ç","†","Ä","è","‡","Å","é","ˆ","Æ","ê","‰","Ç","ë","Š","È","ì","‹","É","í","Œ","Ê","î","Ë","ï","Ž","Ì","ð","Í","ñ","Î","ò","‘","Ï","ó","’","Ð","ô","“","Ñ","õ","”","Ò","ö","•","Ó","ø","–","Ô","ù","—","Õ","ú","˜","Ö","û","™","×","ý","š","Ø","þ","›","Ù","ÿ","œ","Ú"]; 
    var codes = ["&copy;","&#219;","&reg;","&#158;","&#220;","&#159;","&#221;","&#36;","&#222;","&#37;","&#161;","&#223;","&#162;","&#224;","&#163;","&#225;","&Agrave;","&#164;","&#226;","&Aacute;","&#165;","&#227;","&Acirc;","&#166;","&#228;","&Atilde;","&#167;","&#229;","&Auml;","&#168;","&#230;","&Aring;","&#169;","&#231;","&AElig;","&#170;","&#232;","&Ccedil;","&#171;","&#233;","&Egrave;","&#172;","&#234;","&Eacute;","&#173;","&#235;","&Ecirc;","&#174;","&#236;","&Euml;","&#175;","&#237;","&Igrave;","&#176;","&#238;","&Iacute;","&#177;","&#239;","&Icirc;","&#178;","&#240;","&Iuml;","&#179;","&#241;","&ETH;","&#180;","&#242;","&Ntilde;","&#181;","&#243;","&Otilde;","&#182;","&#244;","&Ouml;","&#183;","&#245;","&Oslash;","&#184;","&#246;","&Ugrave;","&#185;","&#247;","&Uacute;","&#186;","&#248;","&Ucirc;","&#187;","&#249;","&Uuml;","&#64;","&#188;","&#250;","&Yacute;","&#189;","&#251;","&THORN;","&#128;","&#190;","&#252","&szlig;","&#191;","&#253;","&agrave;","&#130;","&#192;","&#254;","&aacute;","&#131;","&#193;","&#255;","&aring;","&#132;","&#194;","&aelig;","&#133;","&#195;","&ccedil;","&#134;","&#196;","&egrave;","&#135;","&#197;","&eacute;","&#136;","&#198;","&ecirc;","&#137;","&#199;","&euml;","&#138;","&#200;","&igrave;","&#139;","&#201;","&iacute;","&#140;","&#202;","&icirc;","&#203;","&iuml;","&#142;","&#204;","&eth;","&#205;","&ntilde;","&#206;","&ograve;","&#145;","&#207;","&oacute;","&#146;","&#208;","&ocirc;","&#147;","&#209;","&otilde;","&#148;","&#210;","&ouml;","&#149;","&#211;","&oslash;","&#150;","&#212;","&ugrave;","&#151;","&#213;","&uacute;","&#152;","&#214;","&ucirc;","&#153;","&#215;","&yacute;","&#154;","&#216;","&thorn;","&#155;","&#217;","&yuml;","&#156;","&#218;"];
  
    for(x=0; x<chars.length; x++){
        for (i=0; i<arguments.length; i++){
            arguments[i].value = arguments[i].value.replace(chars[x], codes[x]);
        }
    }
 }

char_convert(this);

Solution 9 - Javascript

function ConvChar(str) {
    c = {'&lt;':'&amp;lt;', '&gt;':'&amp;gt;', '&':'&amp;amp;',
         '"':'&amp;quot;', "'":'&amp;#039;', '#':'&amp;#035;' };

    return str.replace(/[&lt;&amp;>'"#]/g, function(s) { return c[s]; });
}

alert(ConvChar('&lt;-"-&-"->-&lt;-\'-#-\'->'));

Result:

&lt;-&quot;-&amp;-&quot;-&gt;-&lt;-&#039;-&#035;-&#039;-&gt;

In a testarea tag:

<-"-&-"->-<-'-#-'->

If you'll just change a few characters in long code...

Solution 10 - Javascript

If you need support for all standardized named character references, Unicode and ambiguous ampersands, the he library is the only 100% reliable solution I'm aware of!


Example use
he.encode('foo © bar ≠ baz 𝌆 qux');
// Output: 'foo &#xA9; bar &#x2260; baz &#x1D306; qux'

he.decode('foo &copy; bar &ne; baz &#x1D306; qux');
// Output: 'foo © bar ≠ baz 𝌆 qux'

Solution 11 - Javascript

In a PRE tag - and in most other HTML tags - plain text for a batch file that uses the output redirection characters (< and >) will break the HTML, but here is my tip: anything goes in a TEXTAREA element - it will not break the HTML, mainly because we are inside a control instanced and handled by the OS, and therefore its content are not being parsed by the HTML engine.

As an example, say I want to highlight the syntax of my batch file using JavaScript. I simply paste the code in a textarea without worrying about the HTML reserved characters, and have the script process the innerHTML property of the textarea, which evaluates to the text with the HTML reserved characters replaced by their corresponding ISO 8859-1 entities.

Browsers will escape special characters automatically when you retrieve the innerHTML (and outerHTML) property of an element. Using a textarea (and who knows, maybe an input of type text) just saves you from doing the conversion (manually or through code).

I use this trick to test my syntax highlighter, and when I'm done authoring and testing, I simply hide the textarea from view.

Solution 12 - Javascript

A workaround:

var temp = $("div").text("<");
var afterEscape = temp.html(); // afterEscape == "&lt;"

Solution 13 - Javascript

Use:

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 169, 61558, 8226, 61607);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&copy;", "&bull;", "&bull;", "&bull;");

var TextCheck = {
    doCWBind:function(div){
        $(div).bind({
            bind:function(){
                TextCheck.cleanWord(div);
            },
            focus:function(){
                TextCheck.cleanWord(div);
            },
            paste:function(){
                TextCheck.cleanWord(div);
            }
        });
    },
    cleanWord:function(div){
        var output = $(div).val();
        for (i = 0; i < swapCodes.length; i++) {
            var swapper = new RegExp("\\u" + swapCodes[i].toString(16), "g");
            output = output.replace(swapper, swapStrings[i]);
        }
        $(div).val(output);
    }
}

Another one that we use now that works. The one above I have it calling a script instead and returns the converted code. It is only good on small textareas (meaning not a full on article, blog, etc.)


For the above. It works on most characters.

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 61558, 8226, 61607, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 402);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&bull;", "&bull;", "&bull;", "&iexcl;", "&cent;", "&pound;", "&curren;", "&yen;", "&brvbar;", "&sect;", "&uml;", "&copy;", "&ordf;", "&laquo;", "&not;", "&shy;", "&reg;", "&macr;", "&deg;", "&plusmn;", "&sup2;", "&sup3;", "&acute;", "&micro;", "&para;", "&middot;", "&cedil;", "&sup1;", "&ordm;", "&raquo;", "&frac14;", "&frac12;", "&frac34;", "&iquest;", "&Agrave;", "&Aacute;", "&Acirc;", "&Atilde;", "&Auml;", "&Aring;", "&AElig;", "&Ccedil;", "&Egrave;", "&Eacute;", "&Ecirc;", "&Euml;", "&Igrave;", "&Iacute;", "&Icirc;", "&Iuml;", "&ETH;", "&Ntilde;", "&Ograve;", "&Oacute;", "&Ocirc;", "&Otilde;", "&Ouml;", "&times;", "&Oslash;", "&Ugrave;", "&Uacute;", "&Ucirc;", "&Uuml;", "&Yacute;", "&THORN;", "&szlig;", "&agrave;", "&aacute;", "&acirc;", "&atilde;", "&auml;", "&aring;", "&aelig;", "&ccedil;", "&egrave;", "&eacute;", "&ecirc;", "&euml;", "&igrave;", "&iacute;", "&icirc;", "&iuml;", "&eth;", "&ntilde;", "&ograve;", "&oacute;", "&ocirc;", "&otilde;", "&ouml;", "&divide;", "&oslash;", "&ugrave;", "&uacute;", "&ucirc;", "&uuml;", "&yacute;", "&thorn;", "&yuml;", "&#338;", "&#339;", "&#352;", "&#353;", "&#376;", "&#402;");

I create a javascript file that has a lot of functionality including the above. http://www.neotropicsolutions.com/JSChars.zip

All files needed are included. I added jQuery 1.4.4. Simply because I saw issues in other versions, yet to try them out.

Requires: jQuery & jQuery Impromptu from: http://trentrichardson.com/Impromptu/index.php

1. Word Count
2. Character Conversion
3. Checks to ensure this is not passed: "notsomeverylongstringmissingspaces"
4. Checks to make sure ALL IS NOT ALL UPPERCASE.
5. Strip HTML

    // Word Counter
    $.getScript('js/characters.js', function(){
        $('#adtxt').bind("keyup click blur focus change paste",
            function(event){
                TextCheck.wordCount(30, "#adtxt", "#adtxt_count", event);
        });
        $('#adtxt').blur(
            function(event){
                TextCheck.check_length('#adtxt'); // unsures properly spaces-not one long word
                TextCheck.doCWBind('#adtxt'); // char conversion
        });

        TextCheck.wordCount(30, "#adtxt", "#adtxt_count", false);
    });

    //HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" class="wordCount"></textarea>

    <div id="adtxt_count" class="clear"></div>

    // Just Character Conversions:
    TextCheck.doCWBind('#myfield');

    // Run through form fields in a form for case checking.
    // Alerts user when field is blur'd.
    var labels = new Array("Brief Description", "Website URL", "Contact Name", "Website", "Email", "Linkback URL");
    var checking = new Array("descr", "title", "fname", "website", "email", "linkback");
    TextCheck.check_it(checking, labels);

    // Extra security to check again, make sure form is not submitted
    var pass = TextCheck.validate(checking, labels);
    if(pass){
        // Do form actions
    }

    //Strip HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" onblur="TextCheck.stripHTML(this);"></textarea>

Solution 14 - Javascript

<!doctype html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>html</title>

        <script>
            $(function() {
                document.getElementById('test').innerHTML = "&amp;";
            });
        </script>
    </head>

    <body>
        <div id="test"></div>
    </body>
</html>

You can simply convert special characters to HTML using the above code.

Solution 15 - Javascript

Here's a good library I've found very useful in this context.

https://github.com/mathiasbynens/he

According to its author:

> It supports all standardized named character references as per HTML, > handles ambiguous ampersands and other edge cases just like a browser > would, has an extensive test suite, and — contrary to many other > JavaScript solutions — he handles astral Unicode symbols just fine

Solution 16 - Javascript

function escape (text)
{
  return text.replace(/[<>\&\"\']/g, function(c) {
    return '&#' + c.charCodeAt(0) + ';';
  });
}

alert(escape("<>&'\""));

Solution 17 - Javascript

This doesn't direcly answer your question, but if you are using innerHTML in order to write text within an element and you ran into encoding issues, just use textContent, i.e.:

var s = "Foo 'bar' baz <qux>";

var element = document.getElementById('foo');
element.textContent = s;

// <div id="foo">Foo 'bar' baz <qux></div>

Solution 18 - Javascript

Here are a couple methods I use without the need of jQuery:

You can encode every character in your string:

function encode(e){return e.replace(/[^]/g, function(e) {return "&#" + e.charCodeAt(0) + ";"})}

Or just target the main safe encoding characters to worry about (&, inebreaks, <, >, " and ') like:

function encode(r){
    return r.replace(/[\x26\x0A\<>'"]/g, function(r){return "&#" + r.charCodeAt(0) + ";"})
}

test.value = encode('How to encode\nonly html tags &<>\'" nice & fast!');

/*************
* \x26 is &ampersand (it has to be first),
* \x0A is newline,
*************/

<textarea id=test rows="9" cols="55">www.WHAK.com</textarea>

Solution 19 - Javascript

We can use JavaScript's DOMParser for special characters conversion.

const parser = new DOMParser();
const convertedValue = (parser.parseFromString("&#039 &amp &#039 &lt &gt", "application/xml").body.innerText;

Solution 20 - Javascript

The following is the a function to encode XML escaped characters in JavaScript:

Encoder.htmlEncode(unsafeText);

Solution 21 - Javascript

If you're using Lodash, you can do (copy pasted from documentation):

_.escape('fred, barney, & pebbles');
// => 'fred, barney, &amp; pebbles'

For more information: _.escape([string=''])

Solution 22 - Javascript

I had struggled with this myself for quite some time, but I settled on using this negative match regex to match all special characters and convert them to their relevant character codes:

var encoded = value.replace(/[^A-Za-z0-9]/g, function(i) {
	return '&#' + i.charCodeAt(0) + ';';
});

Solution 23 - Javascript

<html>
    <body>
        <script type="text/javascript">
            var str = "&\"'<>";
            alert('B4 Change: \n' + str);

            str = str.replace(/\&/g, '&amp;');
            str = str.replace(/</g,  '&lt;');
            str = str.replace(/>/g,  '&gt;');
            str = str.replace(/\"/g, '&quot;');
            str = str.replace(/\'/g, '&#039;');

            alert('After change: \n' + str);
        </script>
    </body>
</html>

Use this to test: http://www.w3schools.com/js/tryit.asp?filename=tryjs_text

Solution 24 - Javascript

Yes, but if you need to insert the resulting string somewhere without it being converted back, you need to do:

str.replace(/'/g,"&amp;amp;#39;"); // and so on

Solution 25 - Javascript

Use the JavaScript function escape(), that lets you encode strings.

E.g.,

escape("yourString");

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionJin YongView Question on Stackoverflow
Solution 1 - JavascriptCerebrusView Answer on Stackoverflow
Solution 2 - JavascriptStevenView Answer on Stackoverflow
Solution 3 - JavascriptKooiIncView Answer on Stackoverflow
Solution 4 - JavascriptChristos LytrasView Answer on Stackoverflow
Solution 5 - JavascriptMatt HansonView Answer on Stackoverflow
Solution 6 - Javascriptuser1949536View Answer on Stackoverflow
Solution 7 - JavascriptSerj SaganView Answer on Stackoverflow
Solution 8 - JavascriptNeotropicView Answer on Stackoverflow
Solution 9 - Javascriptuser262419View Answer on Stackoverflow
Solution 10 - JavascriptJohn SlegersView Answer on Stackoverflow
Solution 11 - Javascripthector-j-rivasView Answer on Stackoverflow
Solution 12 - JavascriptkeshinView Answer on Stackoverflow
Solution 13 - JavascriptNeotropicView Answer on Stackoverflow
Solution 14 - JavascriptsanmanView Answer on Stackoverflow
Solution 15 - JavascriptDipesh KCView Answer on Stackoverflow
Solution 16 - JavascriptChrisView Answer on Stackoverflow
Solution 17 - JavascriptSimoneView Answer on Stackoverflow
Solution 18 - JavascriptDave BrownView Answer on Stackoverflow
Solution 19 - JavascriptAnkit AryaView Answer on Stackoverflow
Solution 20 - Javascriptuser1211004View Answer on Stackoverflow
Solution 21 - JavascriptI am LView Answer on Stackoverflow
Solution 22 - JavascriptTommy CunninghamView Answer on Stackoverflow
Solution 23 - JavascriptRakesh JuyalView Answer on Stackoverflow
Solution 24 - JavascriptgrahamView Answer on Stackoverflow
Solution 25 - JavascriptBenaiahView Answer on Stackoverflow