bigrams shmigrams

Former-commit-id: 7947042da92a0c541faa499020d0ff72fd054bf2
2022-04-25 01:21:15 +01:00
parent 4ce954cd4d
commit 9ff654d973
5 changed files with 76 additions and 79 deletions
--- a/docs/API.md
+++ b/docs/API.md
@@ -20,14 +20,10 @@ automatically every request
 | POST  | /api/bulk/brick       | array           | no  | POST due to bulk nature |
 | GET   | /api/set/:id          |                 | no  | |
 | GET   | /api/cdn/:id          |                 | no  | |
+| GET   | /api/basket/price/    |                 | no  | |
 | PUT   | /api/auth/login/      |                 | yes | |
 | POST  | /api/auth/signup/     |                 | yes | |
 | GET   | /api/auth/orders/     |                 | yes | |
-| GET   | /api/auth/basket/     |                 | yes | |
-| PUT   | /api/auth/basket/:id  | quantity        | yes | |
-| POST  | /api/auth/basket/:id  |                 | yes | manipulate basket content |
-| DEL   | /api/auth/basket/:id  | quantity        | yes | if no id, delete whole |
-| DEL   | /api/auth/basket/     |                 | yes | if no id, delete whole |

 Query endpoints do not return the full data on a brick/set, they return
 a subset for product listing pages
@@ -52,78 +48,6 @@ brick: brick to search for (absolute type, fuzzy string)

 set: brick to search for (absolute, fuzzy string)

-### /api/special/
-
-GET /api/special/
-
-Response Object
-```json
-{
-    "data": {
-        "title": "Special 1",
-        "end": "2020-01-31T00:00:00.000Z",
-    }
-}
-```
-
-### /api/type/:id
-
-GET /api/type/:id
-
-Response Object
-```json
-{
-    "data": {
-        "type": "brick", // or set
-    }
-}
-```
-
-### /api/search/
-
-GET /api/search?params
-
-### /api/bricks/
-
-GET
-
-Response Object
-```json
-{
-
-}
-```
-
-### /api/sets/
-### /api/brick/:id/
-
-
-
-### /api/set/:id/
-
-
-
-### /api/cdn/:id/
-### /api/auth/login/
-### /api/auth/signup/
-
-Request Body
-```json
-{
-
-}
-```
-
-Response Object
-```json
-{
-    
-}
-```
-
-### /api/auth/orders/
-### /api/auth/basket/
-
 ## Response Structure

 ```js
--- a/docs/QUERY.md
+++ b/docs/QUERY.md
@@ -0,0 +1,24 @@
+# Query
+
+In order to quickly and accurately query my database for hand-written
+searches, I need a way to perform a "fuzzy" search on the data at 
+hand.
+
+I had previously created a simple algorithm to search a database with
+a basic posix regex filter and then rank them on the distance to the
+input query, which was ordered based on Levenshtein Distance.
+
+Keeping to the theme of querying a large, static data set for 
+multiple known attributes (name, tags, id). I decided to stick with
+the distance function for result relavency but lean into automatic
+spell correction with an n-gram model, proposing to the controller,
+"the most probable few alternate queries" that the user might have
+meant, which can then be queried in the database to go through the
+same relavancy distance sorting.
+
+## What I diddn't have time for
+
+My end goal was to create a gradient boosted decision tree which would
+correct spelling based on a trained data set of word frequency, n-gram
+modeling, distance between words and also a static dictionary. However
+this proved out of scope.
--- a/src/controllers/bigram.js
+++ b/src/controllers/bigram.js
@@ -1,2 +0,0 @@
-
-
--- a/src/controllers/n-grams.js
+++ b/src/controllers/n-grams.js
@@ -0,0 +1,43 @@
+const axios = require("axios");
+
+let StaticDictionary = [];
+
+async function Init() {
+    await axios.get('http://www.mieliestronk.com/corncob_lowercase.txt').then(response => {
+        StaticDictionary = response.data;
+    });
+}
+
+function MostProbableAlternateQueries(query) {
+    const words = query.split(' ');
+
+    const reconstruction = [];
+
+    for (let i = 0; i < words.length; i++) {
+        const mostLikely = MostProbableMissSpelling(words[i]);
+        reconstruction.push([...mostLikely]);
+    }
+
+    console.log(reconstruction)
+
+    // work out a bit of context to determine the most likely sentence
+}
+
+function MostProbableMissSpelling(word) {
+    return [];
+}
+
+function TriGrams(word) {
+
+}
+
+function BiGrams(word) {
+
+}
+
+
+module.exports = {
+    Init,
+    MostProbableAlternateQueries,
+    MostProbableMissSpelling,
+};
--- a/src/index.js
+++ b/src/index.js
@@ -5,6 +5,8 @@ const API = require('./routes/api.js');

 const Database = require('./database/database.js');

+const ngrams = require('./controllers/n-grams.js');
+
 async function main() {
    Config.Load();

@@ -21,6 +23,12 @@ async function main() {

    Server.Listen(process.env.PORT);
    API.Init();
+
+    await ngrams.Init();
+
+    ngrams.MostProbableAlternateQueries('brick 2x10x4');
+    ngrams.MostProbableAlternateQueries('lego star wars battlefront');
+    ngrams.MostProbableAlternateQueries('lego stor was s');
 }

 main();