我在脚本标签内抓取数据时遇到了一些问题。我想获取 jsonSwatchConfig [ ] 中的数据,特别是每个 'option_id' 和 'us' 属性。var jar = require('request').jar();var request = require('request').defaults({ jar: jar });var cheerio = require("cheerio");var cloudscraper = require('cloudscraper');var fs = require('fs');const logT = require('log-timestamp');var open = require('open');var today = new Date();var randomsize;var webdriver = require('selenium-webdriver');var request = require("request")var cheerio = require("cheerio")var sizes =[]function product() { console.log("Obtaining Product Page..") request.get({ url: 'https://www.sivasdescalzo.com/en/nike-air-max-270-react-eng-cd0113-400', jar: request.jar(), headers: { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,pt;q=0.6,fr;q=0.5,de;q=0.4', 'cache-control': 'max-age=0', 'dnt': '1', 'referer': 'https://www.sivasdescalzo.com/en/lifestyle', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'}, cloudflareTimeout: 5000, cloudflareMaxTimeout: 30000, followAllRedirects: true, //proxy: getproxy(), challengesToSolve: 3, json: true, decodeEmails: false, gzip: true, }, 我尝试了很多次,但我没有找到一种方法来获得我想要的东西。使用拆分和删除。
1 回答
慕桂英546537
TA贡献1848条经验 获得超10个赞
使用一个怎么样RegExp?
var pattern = /"jsonSwatchConfig": (\[.*\])/;
var results = scriptText.match(pattern)[1];
编辑:要提取option_id和us属性,您首先必须将其从字符串转换为对象。上面的代码将数组 jsonSwatchConfig 作为字符串提供给您,而不是作为实际的对象数组,所以您可以做的是(继续上面的代码):results = JSON.parse(results)现在将数据作为对象数组提供给您。要访问特定属性,您可以执行以下操作
results.forEach(result => {
var x = result.option_id;
var y = result.us;
console.log({ x, y });
});
添加回答
举报
0/150
提交
取消