从表中构造和选择数据
我正在使用cheeriojs从正在开发数据收集器的页面中选择元素,但是在使用此动态表时遇到了困难,
这是environment,其table代码与整页分开。
const fs = require("fs");
const cheerio = require("cheerio");
(async () => {
const page = fs.readFileSync("table.html", "utf8");
const $ = cheerio.load(page);
const info = $("table tbody");
info.each(function(i, element) {
const text = $(this).html();
console.log(text, "*----*");
// console.dir(page, { depth: null });
});
})();
回答如下:const fs = require("fs");
const cheerio = require("cheerio");
(async () => {
const page = fs.readFileSync("table.html", "utf8");
const $ = cheerio.load(page);
const info = $("table tbody");
info.each(function(i, element) {
console.log("###################################\n")
extractTR(this, i)
console.log("___________________________________\n")
});
function extractTR(el, i) {
let childrens = $(el).children()
childrens.each(function(t, item){
if(item.children && item.children.length > 0) {
console.log(extractText(item, t))
}
});
}
function extractText(el, i) {
let data = $(el).children(),
text = [],
type = [],
result
data.each(function(t, elm){
text[t] = $(elm).text().trim()
type[t] = $(elm).prop('tagName')
});
let [text_1, text_2] = text
let [first, second] = type
return result = first == "TD" && second == "TD" ? `${text_1}\n${text_2}` : `${text_1} ${text_2}`
}
})();