首页手记【备战春招】第9天破解JavaScript高级玩法...

【备战春招】第9天破解JavaScript高级玩法第十六讲

标签：

JavaScript

课程名称：破解JavaScript高级玩法

课程章节： 绕不过坎，字符编码

主讲老师： Cloud

课程内容：

今天学习的内容包括：

各类字符编码

课程收获：

16.1 心得：

utf-8转base64 编码规则实现

function to_binary(str) {
  const string = str.replace(/\r\n/g, "\n");
  let result = "";
  let code;
  for (var n = 0; n < string.length; n++) {
    //获取码点
    code = str.charCodeAt(n);
    if (code < 0x007f) {
      // 1个字节
      // 0000 0000 ~ 0000 007F  0 ~ 127 1个字节

      // (code | 0b100000000).toString(2).slice(1)
      result += code.toString(2).padStart(8, "0");
    } else if (code > 0x0080 && code < 0x07ff) {
      // 0000 0080 ~ 0000 07FF	128 ~ 2047 2个字节
      // 0x0080 的二进制为 10000000 ，8位，所以大于0x0080的，至少有8位
      // 格式 110xxxxx 10xxxxxx

      // 高位 110xxxxx
      result += ((code >> 6) | 0b11000000).toString(2);
      // 低位 10xxxxxx
      result += ((code & 0b111111) | 0b10000000).toString(2);
    } else if (code > 0x0800 && code < 0xffff) {
      // 0000 0800 ~ 0000 FFFF	2048 ~ 65535	3个字节
      // 0x0800的二进制为 1000 00000000，12位，所以大于0x0800的，至少有12位
      // 格式 1110xxxx 10xxxxxx 10xxxxxx

      // 最高位 1110xxxx
      result += ((code >> 12) | 0b11100000).toString(2);
      // 第二位 10xxxxxx
      result += (((code >> 6) & 0b111111) | 0b10000000).toString(2);
      // 第三位 10xxxxxx
      result += ((code & 0b111111) | 0b10000000).toString(2);
    } else {
      // 0001 0000 ~ 0010 FFFF   65536 ~ 1114111   4个字节
      // https://www.unicode.org/charts/PDF/Unicode-13.0/U130-2F800.pdf
      throw new TypeError("暂不支持码点大于65535的字符");
    }
  }
  return result;
}



const BASE64_CHARTS =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
function utf8_to_base64(str) {
  let binaryStr = to_binary(str);
  const len = binaryStr.length;

  // 需要填补的=的数量
  let paddingCharLen = len % 24 !== 0 ? (24 - (len % 24)) / 8 : 0;

  //6个一组
  const groups = [];
  for (let i = 0; i < binaryStr.length; i += 6) {
    let g = binaryStr.slice(i, i + 6);
    if (g.length < 6) {
      g = g.padEnd(6, "0");
    }
    groups.push(g);
  }

  // 求值
  let base64Str = groups.reduce((b64str, cur) => {
    b64str += BASE64_CHARTS[+`0b${cur}`];
    return b64str;
  }, "");

  // 填充=
  if (paddingCharLen > 0) {
    base64Str += paddingCharLen > 1 ? "==" : "=";
  }

  return base64Str;
}


console.log(utf8_to_base64("a")); // YQ==

console.log(utf8_to_base64("Ȃ"));  // yII=

console.log(utf8_to_base64("中国人")); // 5Lit5Zu95Lq6

console.log(utf8_to_base64("Coding Writing 好文召集令｜后端、大前端双赛道投稿，2万元奖池等你挑战！"));
//Q29kaW5nIFdyaXRpbmcg5aW95paH5Y+s6ZuG5Luk772c5ZCO56uv44CB5aSn5YmN56uv5Y+M6LWb6YGT5oqV56i/77yMMuS4h+WFg+WlluaxoOetieS9oOaMkeaImO+8gQ==

抽象把字符转为utf-8格式二进制的方法

function to_binary(str) {
  const string = str.replace(/\r\n/g, "\n");
  let result = "";
  let code;
  for (var n = 0; n < string.length; n++) {
    //获取码点
    code = str.charCodeAt(n);
    if (code <= 0x007f) {
      // 1个字节
      // 0000 0000 ~ 0000 007F  0 ~ 127 1个字节

      // (code | 0b100000000).toString(2).slice(1)
      result += code.toString(2).padStart(8, "0");
    } else if (code > 0x0080 && code <= 0x07ff) {
      // 0000 0080 ~ 0000 07FF	128 ~ 2047 2个字节
      // 0x0080 的二进制为 10000000 ，8位，所以大于0x0080的，至少有8位
      // 格式 110xxxxx 10xxxxxx

      // 高位 110xxxxx
      result += ((code >> 6) | 0b11000000).toString(2);
      // 低位 10xxxxxx
      result += ((code & 0b111111) | 0b10000000).toString(2);
    } else if (code > 0x0800 && code <= 0xffff) {
      // 0000 0800 ~ 0000 FFFF	2048 ~ 65535	3个字节
      // 0x0800的二进制为 1000 00000000，12位，所以大于0x0800的，至少有12位
      // 格式 1110xxxx 10xxxxxx 10xxxxxx

      // 最高位 1110xxxx
      result += ((code >> 12) | 0b11100000).toString(2);
      // 第二位 10xxxxxx
      result += (((code >> 6) & 0b111111) | 0b10000000).toString(2);
      // 第三位 10xxxxxx
      result += ((code & 0b111111) | 0b10000000).toString(2);
    } else {
      // 0001 0000 ~ 0010 FFFF   65536 ~ 1114111   4个字节
      // https://www.unicode.org/charts/PDF/Unicode-13.0/U130-2F800.pdf
      throw new TypeError("暂不支持码点大于65535的字符");
    }
  }
  return result;
}

// 01100001
console.log(to_binary('a'))
// 11100110 10001110 10011000
// 11100110 10001110 10011000
console.log(to_binary('掘'))

encodeURI

// 获取字符串 utf-8 编码后的二进制串
// 比如 人（三个字节） => 11100100 10111010 10111010
function to_binary(str) {
    const string = str.replace(/\r\n/g, "\n");
    let result = "";
    let code;
    for (var n = 0; n < string.length; n++) {
        //获取麻点
        code = str.charCodeAt(n);
        if (code < 0x007F) { // 1个字节
            // 0000 0000 ~ 0000 007F  0 ~ 127 1个字节

            // (code | 0b100000000).toString(2).slice(1)
            result += (code).toString(2).padStart(8, '0');
        } else if ((code > 0x0080) && (code < 0x07FF)) {
            // 0000 0080 ~ 0000 07FF	128 ~ 2047 2个字节
            // 0x0080 的二进制为 10000000 ，8位，所以大于0x0080的，至少有8位
            // 格式 110xxxxx 10xxxxxx     

            // 高位 110xxxxx
            result += ((code >> 6) | 0b11000000).toString(2);
            // 低位 10xxxxxx
            result += ((code & 0b111111) | 0b10000000).toString(2);
        } else if (code > 0x0800 && code < 0xFFFF) {
            // 0000 0800 ~ 0000 FFFF	2048 ~ 65535	3个字节
            // 0x0800的二进制为 1000 00000000，12位，所以大于0x0800的，至少有12位
            // 格式 1110xxxx 10xxxxxx 10xxxxxx

            // 最高位 1110xxxx
            result += ((code >> 12) | 0b11100000).toString(2);
            // 第二位 10xxxxxx
            result += (((code >> 6) & 0b111111) | 0b10000000).toString(2);
            // 第三位 10xxxxxx
            result += ((code & 0b111111) | 0b10000000).toString(2);
        } else {
            // 0001 0000 ~ 0010 FFFF   65536 ~ 1114111   4个字节 
            // https://www.unicode.org/charts/PDF/Unicode-13.0/U130-2F800.pdf
            throw new TypeError("暂不支持码点大于65535的字符")
        }
    }
    return result;
}

// utf-8的编码后的二进制串 转为 %XX的格式
// 类似encodeURI/encodeURIComponent, 但是未处理不需要编码的字符
function myEncodeURI(str) {
    // 获取utf-8编码后的二进制
    const bites = to_binary(str);
    // 计算字节数
    const byteCount = bites.length / 8;

    // 8个分组， 为一个字节
    // ['00111111','11111100' ]
    const groups = Array.from({
        length: byteCount
    }, (val, index) => {
        return bites.substring(index * 8, index * 8 + 8)
    })
    console.log('byte groups:', groups.join(' '))

    // 0b11111111
    // eval(0b11111111) => 十进制
    // eval(0b11111111).toString(16) => 16进制
    const codes = groups.map(v => eval(`0b${v}`).toString(16).toUpperCase());
    return '%' + codes.join('%')
}

const testArr = [' ', '人'];

console.log("str:", testArr[0])
console.log('codes:'.padEnd(10, ' '), myEncodeURI(testArr[0]));
console.log('encodeURI:', encodeURI(testArr[0]))

console.log();
console.log("str:", testArr[1])
console.log('codes:'.padEnd(10, ' '), myEncodeURI(testArr[1]));
console.log('encodeURI:', encodeURI(testArr[1]))

点击查看更多内容

为 TA 点赞

若觉得本文不错，就分享一下吧！

评论

评论

共同学习，写下你的评论

评论加载中...

展开查看更多评论

作者其他优质文章

正在加载中

ToRAmen

学生

手记
篇

粉丝

10

获赞与收藏

5

关注作者，订阅最新文章

阅读免费教程

JavaScript 入门教程

80个小节 315784 2221

后端通用面试教程

41个小节 31416 348

网络编程入门教程

20个小节 12891 242

推荐

评论

收藏

共同学习，写下你的评论



感谢您的支持，我会继续努力的～

扫码打赏，你说多少就多少

赞赏金额会直接到老师账户

支付方式

打开微信扫一扫，即可进行扫码打赏哦

今天注册有机会得

100积分直接送

付费专栏免费学

大额优惠券免费领

立即参与放弃机会

点击
抽奖

慕课手记新用户专享福利

恭喜你，你的运气太好了，居然抽中了 100个积分！

恭喜你，抽中了价值元的专栏！

太棒了，直接落到你账户里！

积分商城里的罗技鼠标、机械键盘、
Kindle 阅读器、小米平衡车
Apple iPad （10.2英寸）、大额优惠券
在等着你去兑换了噢

作者：

免费赠送

兑换码：1111222211 复制

优惠券可用于购买实战课、体系课
无门槛使用

先去看看，有什么好东西马上兑换我爱学习，选课去


热搜

最近搜索清空

【备战春招】第9天 破解JavaScript高级玩法 第十六讲

课程内容：

课程收获：

16.1 心得：

阅读免费教程

【备战春招】第9天破解JavaScript高级玩法第十六讲