Extract the current DOM and print it as a string, with styles intact

JavascriptCssHtmlWebkit

Javascript Problem Overview


I'd like to be able to take my DOM, as is, and convert it to a string. Let's say I open up the inspector and make a change to the margin-left property of a particular element. That change should be reflected in my string.

The function should properly take all the styles currently applied to an element (excluding default values) and include them in the inline style for that element.

I have written a 'solution' which has proven to be inadequate. The getMatchedCSSRules function in webkit is extremely finicky, and I haven't been able to determine why it sometimes works and doesn't work other times. Therefore, I would like to avoid using this function unless it works 100% of the time. Similarly, the getComputedStyle function has problems of its own. If use the inspector to change the #footer element on this page to be 7px solid red rather than 7px solid black, the change will be reflected in when I run getComputedStyle(document.getElementById('footer')).cssText in the console, but it will also give me a host of inherited properties that were never modified by either the user using the inspector or by the stylesheets on the page.

I am looking for a solution that works with webkit--cross browser compatibility is not an issue at the moment.

Thank you!

Javascript Solutions


Solution 1 - Javascript

I think this could be a solution (it took me nearly a whole day!).

It returns a string representing the DOM of any element, with all external styles included in the "style" attributes except default values, and does not permanently modify that element.

For example: console.log(document.body.serializeWithStyles());

You can load this code in Web Inspector command line or from a script tag in the body element but NOT in the head element because it requires the existence of document.body.

I have tested it on desktop Safari 5 (I don't have the mobile version).

It works like this:

For each element in the DOM:

  1. caching the value of style.cssText property, which represents the inline style, in an array;
  2. calling getComputedStyle on the element;
  3. checking if we have the css default values lookup table corresponding to this element's tag name;
  4. building it if not;
  5. iterating through the result, finding which values are non default using the lookup table;
  6. applying those non default style values to the element.
    Then storing the outerHTML as the result;
    For each element, restoring the inline styles from the cache;
    Returning the previously stored result.

The code:

Element.prototype.serializeWithStyles = (function () {



// Mapping between tag names and css default values lookup tables. This allows to exclude default values in the result.
var defaultStylesByTagName = {};

// Styles inherited from style sheets will not be rendered for elements with these tag names
var noStyleTags = {"BASE":true,"HEAD":true,"HTML":true,"META":true,"NOFRAME":true,"NOSCRIPT":true,"PARAM":true,"SCRIPT":true,"STYLE":true,"TITLE":true};

// This list determines which css default values lookup tables are precomputed at load time
// Lookup tables for other tag names will be automatically built at runtime if needed
var tagNames = ["A","ABBR","ADDRESS","AREA","ARTICLE","ASIDE","AUDIO","B","BASE","BDI","BDO","BLOCKQUOTE","BODY","BR","BUTTON","CANVAS","CAPTION","CENTER","CITE","CODE","COL","COLGROUP","COMMAND","DATALIST","DD","DEL","DETAILS","DFN","DIV","DL","DT","EM","EMBED","FIELDSET","FIGCAPTION","FIGURE","FONT","FOOTER","FORM","H1","H2","H3","H4","H5","H6","HEAD","HEADER","HGROUP","HR","HTML","I","IFRAME","IMG","INPUT","INS","KBD","KEYGEN","LABEL","LEGEND","LI","LINK","MAP","MARK","MATH","MENU","META","METER","NAV","NOBR","NOSCRIPT","OBJECT","OL","OPTION","OPTGROUP","OUTPUT","P","PARAM","PRE","PROGRESS","Q","RP","RT","RUBY","S","SAMP","SCRIPT","SECTION","SELECT","SMALL","SOURCE","SPAN","STRONG","STYLE","SUB","SUMMARY","SUP","SVG","TABLE","TBODY","TD","TEXTAREA","TFOOT","TH","THEAD","TIME","TITLE","TR","TRACK","U","UL","VAR","VIDEO","WBR"];

// Precompute the lookup tables.
for (var i = 0; i < tagNames.length; i++) {
    if(!noStyleTags[tagNames[i]]) {
        defaultStylesByTagName[tagNames[i]] = computeDefaultStyleByTagName(tagNames[i]);
    }
}

function computeDefaultStyleByTagName(tagName) {
    var defaultStyle = {};
    var element = document.body.appendChild(document.createElement(tagName));
    var computedStyle = getComputedStyle(element);
    for (var i = 0; i < computedStyle.length; i++) {
        defaultStyle[computedStyle[i]] = computedStyle[computedStyle[i]];
    }
    document.body.removeChild(element);	
    return defaultStyle;
}

function getDefaultStyleByTagName(tagName) {
    tagName = tagName.toUpperCase();
    if (!defaultStylesByTagName[tagName]) {
        defaultStylesByTagName[tagName] = computeDefaultStyleByTagName(tagName);
    }
    return defaultStylesByTagName[tagName];
}

return function serializeWithStyles() {
    if (this.nodeType !== Node.ELEMENT_NODE) { throw new TypeError(); }
    var cssTexts = [];
    var elements = this.querySelectorAll("*");
    for ( var i = 0; i < elements.length; i++ ) {
        var e = elements[i];
        if (!noStyleTags[e.tagName]) {
            var computedStyle = getComputedStyle(e);
            var defaultStyle = getDefaultStyleByTagName(e.tagName);
            cssTexts[i] = e.style.cssText;
            for (var ii = 0; ii < computedStyle.length; ii++) {
                var cssPropName = computedStyle[ii];
                if (computedStyle[cssPropName] !== defaultStyle[cssPropName]) {
                    e.style[cssPropName] = computedStyle[cssPropName];
                }
            }
        }
    }
    var result = this.outerHTML;
    for ( var i = 0; i < elements.length; i++ ) {
        elements[i].style.cssText = cssTexts[i];
    }
    return result;
}




})();

})();

Solution 2 - Javascript

Can't you just do document.getElementsByTagName('body')[0].innerHTML? When I make changes in the inspector and then enter the above javascript in the console, it returns the updated HTML.

EDIT: I just tried putting that script in a function and attaching it to an onclick event. Made some updates in the inspector, clicked button, and it worked:

HTML

<button onclick="printDOM()">Print DOM</button>

Javascript

function printDOM() {
    console.log(document.getElementsByTagName('body')[0].innerHTML) ;
}

Solution 3 - Javascript

In case you want to capture the whole page, it is easier to just get all non-inline stylesheets and inline them.

The approach in the accepted answer is magnificent, but quite slow and touches the whole document.

I took the following approach to capture a page including style:

  1. document.documentElement.outerHTML;

  2. get all stylesheets from the document.styleSheets API

Along the lines of:

function captureCss(){
	var cssrules = "";
	var sheets = document.styleSheets;
	for(var i = 0; i<sheets.length; i++){
		if(!sheets[i].disabled && sheets[i].href != null) { // or sheets[i].href.nodeName == 'LINK'
			if(sheets[i].rules == null){ // can be null because of cross origin policy
				try{
					var fetched = XHR GET(sheets[i].href); // works nicely because it hits the cache
					if(fetched){
						cssrules += "<style>\n"+fetched+"\n</style>\n"
					}
				}catch(e){
					console.log(e);
				}
				continue;
			}
			for(var j=0;j<sheets[i].rules.length;j++){
				cssrules += "<style>\n"+sheets[i].rules[j].cssText+"\n</style>\n"
			}
		}
	}
	return cssrules;
}

3. Add the captured cssrules as the first thing of the header in the outerHtml html text

This way you get a self contained styled page.

This is obviously less applicable for partial content.

Solution 4 - Javascript

Based on Luc125's answer, I've created a developer tools extension for Chrome that incorporates that code for capturing styles and markup for a page fragment. The extension is in the [Chrome Web Store][1] and is on [Github][2]. The "Computed Styles" output option uses that method.

![Extension Screenshot][3]

[1]: https://chrome.google.com/webstore/detail/css%20html/pbgafccggboemhmcmnmglkgidbiigoeh?hl=en "Chrome Web Store" [2]: https://github.com/ifugu/CSS_Plus_HTML/ "Github" [3]: http://i.stack.imgur.com/Ck0w3.png

Solution 5 - Javascript

Maybe the Google Closure Library has a solution for you.

There's code that seems to do what you need, i.e., to compute the CSS rules to reproduce the same appearance of an element outside of its current position in the dom (in their case they need that to transfer styles into an iframe to use it as a seamless inline editor).

Quoting from the source file style.js:

/**
 * @fileoverview Provides utility routines for copying modified
 * `CSSRule` objects from the parent document into iframes so that any
 * content in the iframe will be styled as if it was inline in the parent
 * document.
 *
 * <p>
 * For example, you might have this CSS rule:
 *
 * #content .highlighted { background-color: yellow; }
 *
 * And this DOM structure:
 *
 * <div id="content">
 *   <iframe />
 * </div>
 *
 * Then inside the iframe you have:
 *
 * <body>
 * <div class="highlighted">
 * </body>
 *
 * If you copied the CSS rule directly into the iframe, it wouldn't match the
 * .highlighted div. So we rewrite the original stylesheets based on the
 * context where the iframe is going to be inserted. In this case the CSS
 * selector would be rewritten to:
 *
 * body .highlighted { background-color: yellow; }
 * </p>
 */

Solution 6 - Javascript

OK, maybe I'm missing something here, but isn't the string you want just document.documentElement.innerHTML? A quick test w/ Chrome verifies that it picks up the changes made in the Developer Tools to style attributes as you describe. Assigned class names aren't expanded (e.g., you'll have no idea what class="superfuntime" is doing), but if I'm reading your question correctly, you haven't stated a need for that.

Solution 7 - Javascript

Internet Explorer -> Developer Tools -> DOM Explorer

Select element and right click -> "Copy element with styles".

Solution 8 - Javascript

Chrome feature - Printing the DOM:
The --dump-dom flag prints document.body.innerHTML to stdout:

chrome --headless --disable-gpu --dump-dom https://www.chromestatus.com/

Read more

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionD-NiceView Question on Stackoverflow
Solution 1 - JavascriptLuc125View Answer on Stackoverflow
Solution 2 - JavascriptsquidbeView Answer on Stackoverflow
Solution 3 - JavascriptsleeplessnerdView Answer on Stackoverflow
Solution 4 - JavascriptifuguView Answer on Stackoverflow
Solution 5 - JavascriptalienhardView Answer on Stackoverflow
Solution 6 - JavascriptJURUView Answer on Stackoverflow
Solution 7 - JavascriptionView Answer on Stackoverflow
Solution 8 - JavascriptsaulsluzView Answer on Stackoverflow