Files
legolog/db/setstealer.js
Ben 3ed429f0f4 fixed basket bug and added brick page
Former-commit-id: d08446b9bf6d9398ed6762b7688ce493509e4731
2022-04-15 17:21:23 +01:00

52 lines
1.6 KiB
JavaScript

// scrapes bricklink for the every brick and amounts in a set of lego
const fs = require('fs');
const axios = require('axios');
// For sets make sets.txt
const sets = fs.readFileSync('res/Sets.txt', 'utf8').toString().split('\n').map((i) => i.split('\t'));
// output format:
// setid: { brickid: amount, brickid: amount, ... }
async function post(url) {
// axios return HTML from website
try {
const res = await axios.get(url, {
method: 'POST',
headers: { 'User-Agent': 'Chrome/96.0.4664.175' },
});
return res.data.toString();
} catch (e) {
fs.appendFileSync('error-set.txt', `${url}\n`);
console.log(`Failed to download ${url}`);
}
}
async function main() {
// sometimes fails on minifigures - doesn't matter though, it's correct enough
const regex = /class=".*?IV_ITEM".*?if \(brickList\["(.*?)"]\).*?nbsp;(.*?) /g;
const output = {};
for (let i = 0; i < sets.length; i++) {
const set = sets[i];
const data = await post(`https://www.bricklink.com/catalogItemInv.asp?S=${set[2]}`);
output[set[2]] = {};
let brickCount = 0;
let m;
while ((m = regex.exec(data)) !== null) {
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
brickCount += parseInt(m[2]);
output[set[2]] = { ...output[set[2]], [m[1]]: parseInt(m[2]) };
}
console.log(`${i}/${sets.length} ${set[2]} has ${brickCount} bricks`);
fs.writeFileSync('res/sets.json', JSON.stringify(output));
}
}
main();