How to format/tidy/beautify in JavaScript

JavascriptJqueryHtml

Javascript Problem Overview


How can I format/tidy/beautify HTML in JavaScript? I have tried doing a search/replace for angle brackets (<, >) and indenting accordingly. But of course it does not take into account when the is JS or CSS etc inside the HTML.

The reason I want to do this is I have made a content editor (CMS) which has both WYSIWYG and source code views. The problem the code written by the WYSIWYG editor is normally a single line. So I would like a JavaScript that could format this into a more readable form on demand.

Here what I have so far:

function getIndent(level) {
    var result = '',
        i = level * 4;
    if (level < 0) {
        throw "Level is below 0";
    }
    while (i--) {
        result += ' ';
    }
    return result;
}

function style_html(html) {
    html = html.trim();
    var result = '',
        indentLevel = 0,
        tokens = html.split(/</);
    for (var i = 0, l = tokens.length; i < l; i++) {
        var parts = tokens[i].split(/>/);
        if (parts.length === 2) {
            if (tokens[i][0] === '/') {
                indentLevel--;
            }
            result += getIndent(indentLevel);
            if (tokens[i][0] !== '/') {
                indentLevel++;
            }

            if (i > 0) {
                result += '<';
            }

            result += parts[0].trim() + ">\n";
            if (parts[1].trim() !== '') {
                result += getIndent(indentLevel) + parts[1].trim().replace(/\s+/g, ' ') + "\n";
            }

            if (parts[0].match(/^(img|hr|br)/)) {
                indentLevel--;
            }
        } else {
            result += getIndent(indentLevel) + parts[0] + "\n";
        }
    }
    return result;
}

Javascript Solutions


Solution 1 - Javascript

I use this method to format HTML. Simple, but does the job:

function format(html) {
    var tab = '\t';
    var result = '';
    var indent= '';

    html.split(/>\s*</).forEach(function(element) {
        if (element.match( /^\/\w/ )) {
            indent = indent.substring(tab.length);
        }

        result += indent + '<' + element + '>\r\n';

        if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input")  ) { 
            indent += tab;              
        }
    });

    return result.substring(1, result.length-3);
}

Solution 2 - Javascript

@lovasoa https://stackoverflow.com/questions/3913355/how-to-format-tidy-beautify-in-javascript#27367462 is an excellent solution.
rock-solid, much better than vkBeautify or even CodeMirror (hard to use AMD) and VERY easy

<script src='http://lovasoa.github.io/tidy-html5/tidy.js'></script>
<script>
  options = {
  "indent":"auto",
  "indent-spaces":2,
  "wrap":80,
  "markup":true,
  "output-xml":false,
  "numeric-entities":true,
  "quote-marks":true,
  "quote-nbsp":false,
  "show-body-only":true,
  "quote-ampersand":false,
  "break-before-br":true,
  "uppercase-tags":false,
  "uppercase-attributes":false,
  "drop-font-tags":true,
  "tidy-mark":false
}

var html = document.querySelector("body").outerHTML;
var result = tidy_html5(html, options);
console.log(result);
</script>

Solution 3 - Javascript

I needed something similar and here is my solution, inspired by method provided by michal.jakubeczy. It is slightly complicated in order to preserve formatting within <pre> tags. Hope this will help someone.

function formatHTML(html) {
    var indent = '\n';
    var tab = '\t';
    var i = 0;
    var pre = [];

    html = html
        .replace(new RegExp('<pre>((.|\\t|\\n|\\r)+)?</pre>'), function (x) {
            pre.push({ indent: '', tag: x });
            return '<--TEMPPRE' + i++ + '/-->'
        })
        .replace(new RegExp('<[^<>]+>[^<]?', 'g'), function (x) {
            var ret;
            var tag = /<\/?([^\s/>]+)/.exec(x)[1];
            var p = new RegExp('<--TEMPPRE(\\d+)/-->').exec(x);

            if (p) 
                pre[p[1]].indent = indent;
            
            if (['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'].indexOf(tag) >= 0) // self closing tag
                ret = indent + x;
            else {
                if (x.indexOf('</') < 0) { //open tag
                    if (x.charAt(x.length - 1) !== '>')
                        ret = indent + x.substr(0, x.length - 1) + indent + tab + x.substr(x.length - 1, x.length);
                    else 
                        ret = indent + x;
                    !p && (indent += tab);
                }
                else {//close tag
                    indent = indent.substr(0, indent.length - 1);
                    if (x.charAt(x.length - 1) !== '>')
                        ret =  indent + x.substr(0, x.length - 1) + indent + x.substr(x.length - 1, x.length);
                    else
                        ret = indent + x;
                }
            }
            return ret;
        });

    for (i = pre.length; i--;) {
        html = html.replace('<--TEMPPRE' + i + '/-->', pre[i].tag.replace('<pre>', '<pre>\n').replace('</pre>', pre[i].indent + '</pre>'));
    }

    return html.charAt(0) === '\n' ? html.substr(1, html.length - 1) : html;
}

function unformatHTML(html) {
    var i = 0;
    var pre = [];

    html = html.replace(new RegExp('<pre>((.|\\t|\\n|\\r)+)?</pre>'), function (x) {
        pre.push(x);
        return '<--TEMPPRE' + i++ + '/-->'
    }).replace(/\n/g, '').replace(/\t/g, '');

    for (i = pre.length; i--;) {
        html = html.replace('<--TEMPPRE' + i + '/-->', pre[i]);
    }

    html = html.replace(new RegExp('<pre>\\n'), '<pre>').replace(new RegExp('\\n\\t*</pre>'), '</pre>');
    return html;
}

Solution 4 - Javascript

I find js-beautify far superior to any solution posted so far.

Add the script to your lib folder:

Bring inside header as usual:

<script src="libs/beautify.js"></script>

Target code wherever it is on your page (e.g. pre or code tag) and use the js_beautify function to format as needed:

$(".my_class").text(js_beautify($(".my_class").text()))

This will format as needed. All kinds of config options available on the repo.

Solution 5 - Javascript

You can also use a command line tool if you have node.js install

run npm install -g uglify-js to install uglifyjs globally, check here for documentation.

Then you can uglify index.min.js -b -o index.js

Solution 6 - Javascript

jQuery creator John Resig wrote a fast and lightweight HTML parser in javascript. If you're looking for a solution which you can add directly to your CMS then you could write a simple beautifier using this parser as a base. All you'd need to do is reoutput the elements adding spaces and line breaks as you like, using the built in api:

HTMLParser(htmlString, {
  start: function(tag, attrs, unary) {},
  end: function(tag) {},
  chars: function(text) {},
  comment: function(text) {}
});

An added benefit of this approach is that you could use the same HTMLParser to read HTML back into your WYSIWYG, or otherwise interact with your user's HTML tree. HTMLParser also comes prebuilt with an HTMLtoDOM method.

Solution 7 - Javascript

Writing the [tag:HTML] on one line would download faster to the browser, so I am not sure I would want it formatted. Maybe an option for a formatted version or an optimized version.

As for the question... you could do an [tag:AJAX] call after so many actions and send the code to the server to be formatted and shown in a different box on the screen. Basically it would be a real time version of this site, http://infohound.net/tidy/

Solution 8 - Javascript

I believe that both chrome and firebug's debugging code display engines are written in JS. That's probably heavier duty than you really want to be messing with though.

Solution 9 - Javascript

Resig's formatter fails with a very simple test case:

at http://ejohn.org/apps/htmlparser/

in the input box enter:

<script src="/files/htmlparser.js"></script>
<script>
var x = 1;
</script>

output box renders:

<script src="/files/htmlparser.js"></script>
<script></script>
var x = 1;

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionPetahView Question on Stackoverflow
Solution 1 - Javascriptmichal.jakubeczyView Answer on Stackoverflow
Solution 2 - JavascriptrickdogView Answer on Stackoverflow
Solution 3 - JavascriptGabrielView Answer on Stackoverflow
Solution 4 - JavascriptCyberneticView Answer on Stackoverflow
Solution 5 - JavascriptnickleeflyView Answer on Stackoverflow
Solution 6 - JavascriptDaniel MendelView Answer on Stackoverflow
Solution 7 - JavascriptEricView Answer on Stackoverflow
Solution 8 - JavascriptPaul McMillanView Answer on Stackoverflow
Solution 9 - JavascriptrickdogView Answer on Stackoverflow