首页手记手动实现HTML插件Beautify

手动实现HTML插件Beautify

标签：

Html/CSS JavaScript

学习HTML-Beautify.js之后，我们发现使用JavaScript对HTML进行解析也并不神秘，首先是逐字符进行分析，从中提取标记（Token），在HTML只存在两种类型的标记-标签和正文，然后对这些Token进行语法分析，主要是缩进量是多少。
在有这些基本概念后，今天我们就自己来实现这个小程序：

// 优化过的HTML-Beautify
    function HtmlBeautify(source, indent_value) {
        this.source = source;
        this.indent_value = indent_value;
        this.result = "";

        this.parse();
    }

    // 分析并产生输出到this.result
    HtmlBeautify.prototype.parse = function() {
        var that = this;
        // 当前分析到哪个字符，当前标记值，标记类型，
        // 输出数组，缩进级别，当前格式化内容（去掉多余空格）
        var pos = 0, token_value = "", token_type = "",
        output = [], indent_level = 0, is_format_content = true;

        // 把这些标签作为Single Tag
        var single_token = "br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed".split(',');
        var white_space = "\r\n\t ".split("");

        // 获取下一个标记（首先获取正文，如果正文为空则获取标签）
        function nextToken() {
            var token_value_array = [], val = "", space = false;

            // "&lt;"之前的所有内容作为正文标签
            while ((val = that.source[pos]) !== "&lt;") {
                if (pos &gt;= that.source.length) {
                    token_type = "END";
                    return;
                }

                if (is_format_content) {
                    if ($.inArray(val, white_space) &gt;= 0) {
                        space = true;
                        pos++;
                        continue;
                    }

                    if (space) {
                        token_value_array.push(" ");
                        space = false;
                    }
                }
                token_value_array.push(val);
                pos++;
            }

            token_value = token_value_array.join("").replace(/^\n+|\n$/g,"");
            if ($.trim(token_value) === "") {
                // 如果正文标记为空，则获取标签标记
                if(!is_format_content) {
                    is_format_content = true;
                }
                nextTokenTag();
            } else {
                token_type = "CONTENT";
            }
        }

        // 下一个标签标记
        function nextTokenTag() {
            var token_value_array = [], val = "",
                tagName = "", space = false, is_comment = false;
            
            // 这是一个注释标签
            if(that.source[pos + 1] === "!" && 
                that.source[pos + 2] === "-" &&
                that.source[pos + 3] === "-") {
                is_comment = true;
            }

            // 获取标签标记，直到遇到"&gt;"
            do {
                val = that.source[pos];

                if (!is_comment) {
                    // 如果此字符为空格换行制表符，则跳过此字符
                    if ($.inArray(val, white_space) &gt;= 0) {
                        space = true;
                        pos++;
                        continue;
                    }

                    if (space) {
                        if(token_value_array[token_value_array.length - 1] !== "=" && val !== "=") {
                            token_value_array.push(" ");
                        }
                        space = false;
                    }
                }

                if(val === "/" && that.source[pos + 1] === "&gt;" && token_value_array[token_value_array.length - 1] !== " ") {
                    token_value_array.push(" ");
                }

                token_value_array.push(val);
                pos++;
            } while (val !== "&gt;");

            token_value = $.trim(token_value_array.join(""));
            // 当前标签的名称（小写）
            tagName = getTagName();
            
            if(is_comment) {
                token_type = "SINGLE_TAG";
            } else {
                if (token_value[1] === "/") {
                    // token_value以"&lt;/"开始，则认为是结束标签
                    token_type = "END_TAG";
                } else if ($.inArray(tagName, single_token) &gt;= 0 || token_value[token_value.length - 2] === "/") {
                    // 如果标签在single_token或者token_value以"/&gt;"结尾，则认为是独立标签
                    // 这种判断没有考虑这种情况："&lt;br&gt;&lt;/br&gt;"
                    token_type = "SINGLE_TAG";
                } else {
                    token_type = "START_TAG";
                    if (tagName === "script" || tagName === "style") {
                        is_format_content = false;
                    }
                }
            }
        }

        function getTagName() {
            var tagName = token_value.substr(1, token_value.length - 2);
            var spaceIndex = tagName.indexOf(" ");
            if (spaceIndex &gt; 0) {
                tagName = tagName.substr(0, spaceIndex);
            }
            return tagName.toLowerCase();
        }

        // 输出当前标记
        function outputToken() {
            output.push(token_value);
        }
        // 输出新行
        function outputLine() {
            output.push("\n");
        }
        // 输出缩进
        function outputIndent() {
            for (var i = 0; i &lt; indent_level; i++) {
                output.push(that.indent_value);
            }
        }

        // parse的主体函数，循环获取下一个Token
        while (true) {
            nextToken();

            // 当前Token为结束标记
            if (token_type === "END") {
                break;
            }

            switch (token_type) {
                case "START_TAG":
                    // 我们对缩进的控制非常简单，开始标签后缩进一个单位
                    outputLine();
                    outputIndent();
                    outputToken();
                    indent_level++;
                    break;
                case "END_TAG":
                    // 结束标签前减少一个单位缩进
                    indent_level--;
                    outputLine();
                    outputIndent();
                    outputToken();
                    break;
                case "SINGLE_TAG":
                    outputLine();
                    outputIndent();
                    outputToken();
                    break;
                case "CONTENT":
                    outputLine();
                    if(is_format_content) {
                        outputIndent();
                    }
                    outputToken();
                    break;
            }
        }
        // 去除最前面的"\n"
        this.result = output.join("").substr(1);
    };

    $(function() {
        $("#format").click(function() {

            // 实例化HtmlBeautify，传递需要解析的HTML片段和缩进字符串
            var beautify = new HtmlBeautify($("#content").val(), "    ");
            $("#content").val(beautify.result);

        });
    });

点击查看更多内容

为 TA 点赞

若觉得本文不错，就分享一下吧！

8 评论

评论

共同学习，写下你的评论

12楼
慕UI3141282
先立个flag，1个月学完
0回复举报 2020.03.03
取消
回复
11楼
慕九州0033206
非常不错，感谢分享，点赞！
0回复举报 2018.12.12
取消
回复
8楼
慕斯卡1543131
入门小白，学习学习是怎么样的
1回复举报 2018.12.03
z张_cuichan01 回复慕斯卡1543131
可以试试~
回复举报 2018-12-04
取消
回复
7楼
慕斯卡1543131
入门小白，学习学习是怎么样的
0回复举报 2018.12.03
取消
回复
展开查看剩余评论

展开查看更多评论

作者其他优质文章

正在加载中

慕后森

手记
261篇

粉丝

57

获赞与收藏

236

关注作者，订阅最新文章

阅读免费教程

JavaScript 入门教程

80个小节 317173 2239

后端通用面试教程

41个小节 31551 352

网络编程入门教程

20个小节 12967 244

推荐

评论

收藏

共同学习，写下你的评论



感谢您的支持，我会继续努力的～

扫码打赏，你说多少就多少

赞赏金额会直接到老师账户

支付方式

打开微信扫一扫，即可进行扫码打赏哦

今天注册有机会得

100积分直接送

付费专栏免费学

大额优惠券免费领

立即参与放弃机会

点击
抽奖

慕课手记新用户专享福利

恭喜你，你的运气太好了，居然抽中了 100个积分！

恭喜你，抽中了价值元的专栏！

太棒了，直接落到你账户里！

积分商城里的罗技鼠标、机械键盘、
Kindle 阅读器、小米平衡车
Apple iPad （10.2英寸）、大额优惠券
在等着你去兑换了噢

作者：

免费赠送

兑换码：1111222211 复制

优惠券可用于购买实战课、体系课
无门槛使用

先去看看，有什么好东西马上兑换我爱学习，选课去


热搜

最近搜索清空

手动实现HTML插件Beautify

阅读免费教程