diff --git a/docs/API.md b/docs/API.md index ea69a55..fde99e6 100644 --- a/docs/API.md +++ b/docs/API.md @@ -20,14 +20,10 @@ automatically every request | POST | /api/bulk/brick | array | no | POST due to bulk nature | | GET | /api/set/:id | | no | | | GET | /api/cdn/:id | | no | | +| GET | /api/basket/price/ | | no | | | PUT | /api/auth/login/ | | yes | | | POST | /api/auth/signup/ | | yes | | | GET | /api/auth/orders/ | | yes | | -| GET | /api/auth/basket/ | | yes | | -| PUT | /api/auth/basket/:id | quantity | yes | | -| POST | /api/auth/basket/:id | | yes | manipulate basket content | -| DEL | /api/auth/basket/:id | quantity | yes | if no id, delete whole | -| DEL | /api/auth/basket/ | | yes | if no id, delete whole | Query endpoints do not return the full data on a brick/set, they return a subset for product listing pages @@ -52,78 +48,6 @@ brick: brick to search for (absolute type, fuzzy string) set: brick to search for (absolute, fuzzy string) -### /api/special/ - -GET /api/special/ - -Response Object -```json -{ - "data": { - "title": "Special 1", - "end": "2020-01-31T00:00:00.000Z", - } -} -``` - -### /api/type/:id - -GET /api/type/:id - -Response Object -```json -{ - "data": { - "type": "brick", // or set - } -} -``` - -### /api/search/ - -GET /api/search?params - -### /api/bricks/ - -GET - -Response Object -```json -{ - -} -``` - -### /api/sets/ -### /api/brick/:id/ - - - -### /api/set/:id/ - - - -### /api/cdn/:id/ -### /api/auth/login/ -### /api/auth/signup/ - -Request Body -```json -{ - -} -``` - -Response Object -```json -{ - -} -``` - -### /api/auth/orders/ -### /api/auth/basket/ - ## Response Structure ```js diff --git a/docs/QUERY.md b/docs/QUERY.md new file mode 100644 index 0000000..f3963e6 --- /dev/null +++ b/docs/QUERY.md @@ -0,0 +1,24 @@ +# Query + +In order to quickly and accurately query my database for hand-written +searches, I need a way to perform a "fuzzy" search on the data at +hand. + +I had previously created a simple algorithm to search a database with +a basic posix regex filter and then rank them on the distance to the +input query, which was ordered based on Levenshtein Distance. + +Keeping to the theme of querying a large, static data set for +multiple known attributes (name, tags, id). I decided to stick with +the distance function for result relavency but lean into automatic +spell correction with an n-gram model, proposing to the controller, +"the most probable few alternate queries" that the user might have +meant, which can then be queried in the database to go through the +same relavancy distance sorting. + +## What I diddn't have time for + +My end goal was to create a gradient boosted decision tree which would +correct spelling based on a trained data set of word frequency, n-gram +modeling, distance between words and also a static dictionary. However +this proved out of scope. diff --git a/src/controllers/bigram.js b/src/controllers/bigram.js deleted file mode 100644 index 99a8091..0000000 --- a/src/controllers/bigram.js +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/src/controllers/n-grams.js b/src/controllers/n-grams.js new file mode 100644 index 0000000..5ed7d37 --- /dev/null +++ b/src/controllers/n-grams.js @@ -0,0 +1,43 @@ +const axios = require("axios"); + +let StaticDictionary = []; + +async function Init() { + await axios.get('http://www.mieliestronk.com/corncob_lowercase.txt').then(response => { + StaticDictionary = response.data; + }); +} + +function MostProbableAlternateQueries(query) { + const words = query.split(' '); + + const reconstruction = []; + + for (let i = 0; i < words.length; i++) { + const mostLikely = MostProbableMissSpelling(words[i]); + reconstruction.push([...mostLikely]); + } + + console.log(reconstruction) + + // work out a bit of context to determine the most likely sentence +} + +function MostProbableMissSpelling(word) { + return []; +} + +function TriGrams(word) { + +} + +function BiGrams(word) { + +} + + +module.exports = { + Init, + MostProbableAlternateQueries, + MostProbableMissSpelling, +}; diff --git a/src/index.js b/src/index.js index 9402f1c..1f5983f 100644 --- a/src/index.js +++ b/src/index.js @@ -5,6 +5,8 @@ const API = require('./routes/api.js'); const Database = require('./database/database.js'); +const ngrams = require('./controllers/n-grams.js'); + async function main() { Config.Load(); @@ -21,6 +23,12 @@ async function main() { Server.Listen(process.env.PORT); API.Init(); + + await ngrams.Init(); + + ngrams.MostProbableAlternateQueries('brick 2x10x4'); + ngrams.MostProbableAlternateQueries('lego star wars battlefront'); + ngrams.MostProbableAlternateQueries('lego stor was s'); } main();