Files
legolog/db/setstealer.js
Ben 925af9cc9d databases
Former-commit-id: 5bc8a1b761fe2241c32e51b30b6c0e0dfdeb3461
2022-02-03 18:55:09 +00:00

53 lines
1.7 KiB
JavaScript

// scrapes bricklink for the every piece and amounts in a set of lego
const fs = require('fs');
const axios = require('axios');
// For sets make sets.txt
const sets = fs.readFileSync('res/Sets.txt', 'utf8').toString().split('\n').map((i) => i.split('\t'));
// output format:
// setid: { pieceid: amount, pieceid: amount, ... }
async function post(url) {
// axios return HTML from website
try {
const res = await axios.get(url, {
method: 'POST',
headers: { 'User-Agent':'Chrome/96.0.4664.175' } ,
});
return res.data.toString();
} catch(e) {
fs.appendFileSync('error-set.txt', `${url}\n`);
console.log(`Failed to download ${url}`);
return;
}
}
async function main() {
// sometimes fails on minifigures - doesn't matter though, it's correct enough
const regex = /class=".*?IV_ITEM".*?if \(brickList\["(.*?)"]\).*?nbsp;(.*?) /g
const output = {}
for (let i = 0; i < sets.length; i++) {
const set = sets[i];
const data = await post(`https://www.bricklink.com/catalogItemInv.asp?S=${set[2]}`);
output[set[2]] = {};
let pieceCount = 0;
let m;
while ((m = regex.exec(data)) !== null) {
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
pieceCount += parseInt(m[2]);
output[set[2]] = { ...output[set[2]], [m[1]]: parseInt(m[2]) };
}
console.log(`${i}/${sets.length} ${set[2]} has ${pieceCount} pieces`);
fs.writeFileSync('res/sets.json', JSON.stringify(output));
}
}
main();