bigrams shmigrams
Former-commit-id: 7947042da92a0c541faa499020d0ff72fd054bf2
This commit is contained in:
78
docs/API.md
78
docs/API.md
@@ -20,14 +20,10 @@ automatically every request
|
||||
| POST | /api/bulk/brick | array | no | POST due to bulk nature |
|
||||
| GET | /api/set/:id | | no | |
|
||||
| GET | /api/cdn/:id | | no | |
|
||||
| GET | /api/basket/price/ | | no | |
|
||||
| PUT | /api/auth/login/ | | yes | |
|
||||
| POST | /api/auth/signup/ | | yes | |
|
||||
| GET | /api/auth/orders/ | | yes | |
|
||||
| GET | /api/auth/basket/ | | yes | |
|
||||
| PUT | /api/auth/basket/:id | quantity | yes | |
|
||||
| POST | /api/auth/basket/:id | | yes | manipulate basket content |
|
||||
| DEL | /api/auth/basket/:id | quantity | yes | if no id, delete whole |
|
||||
| DEL | /api/auth/basket/ | | yes | if no id, delete whole |
|
||||
|
||||
Query endpoints do not return the full data on a brick/set, they return
|
||||
a subset for product listing pages
|
||||
@@ -52,78 +48,6 @@ brick: brick to search for (absolute type, fuzzy string)
|
||||
|
||||
set: brick to search for (absolute, fuzzy string)
|
||||
|
||||
### /api/special/
|
||||
|
||||
GET /api/special/
|
||||
|
||||
Response Object
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"title": "Special 1",
|
||||
"end": "2020-01-31T00:00:00.000Z",
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### /api/type/:id
|
||||
|
||||
GET /api/type/:id
|
||||
|
||||
Response Object
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"type": "brick", // or set
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### /api/search/
|
||||
|
||||
GET /api/search?params
|
||||
|
||||
### /api/bricks/
|
||||
|
||||
GET
|
||||
|
||||
Response Object
|
||||
```json
|
||||
{
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
### /api/sets/
|
||||
### /api/brick/:id/
|
||||
|
||||
|
||||
|
||||
### /api/set/:id/
|
||||
|
||||
|
||||
|
||||
### /api/cdn/:id/
|
||||
### /api/auth/login/
|
||||
### /api/auth/signup/
|
||||
|
||||
Request Body
|
||||
```json
|
||||
{
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
Response Object
|
||||
```json
|
||||
{
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
### /api/auth/orders/
|
||||
### /api/auth/basket/
|
||||
|
||||
## Response Structure
|
||||
|
||||
```js
|
||||
|
||||
24
docs/QUERY.md
Normal file
24
docs/QUERY.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Query
|
||||
|
||||
In order to quickly and accurately query my database for hand-written
|
||||
searches, I need a way to perform a "fuzzy" search on the data at
|
||||
hand.
|
||||
|
||||
I had previously created a simple algorithm to search a database with
|
||||
a basic posix regex filter and then rank them on the distance to the
|
||||
input query, which was ordered based on Levenshtein Distance.
|
||||
|
||||
Keeping to the theme of querying a large, static data set for
|
||||
multiple known attributes (name, tags, id). I decided to stick with
|
||||
the distance function for result relavency but lean into automatic
|
||||
spell correction with an n-gram model, proposing to the controller,
|
||||
"the most probable few alternate queries" that the user might have
|
||||
meant, which can then be queried in the database to go through the
|
||||
same relavancy distance sorting.
|
||||
|
||||
## What I diddn't have time for
|
||||
|
||||
My end goal was to create a gradient boosted decision tree which would
|
||||
correct spelling based on a trained data set of word frequency, n-gram
|
||||
modeling, distance between words and also a static dictionary. However
|
||||
this proved out of scope.
|
||||
@@ -1,2 +0,0 @@
|
||||
|
||||
|
||||
43
src/controllers/n-grams.js
Normal file
43
src/controllers/n-grams.js
Normal file
@@ -0,0 +1,43 @@
|
||||
const axios = require("axios");
|
||||
|
||||
let StaticDictionary = [];
|
||||
|
||||
async function Init() {
|
||||
await axios.get('http://www.mieliestronk.com/corncob_lowercase.txt').then(response => {
|
||||
StaticDictionary = response.data;
|
||||
});
|
||||
}
|
||||
|
||||
function MostProbableAlternateQueries(query) {
|
||||
const words = query.split(' ');
|
||||
|
||||
const reconstruction = [];
|
||||
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
const mostLikely = MostProbableMissSpelling(words[i]);
|
||||
reconstruction.push([...mostLikely]);
|
||||
}
|
||||
|
||||
console.log(reconstruction)
|
||||
|
||||
// work out a bit of context to determine the most likely sentence
|
||||
}
|
||||
|
||||
function MostProbableMissSpelling(word) {
|
||||
return [];
|
||||
}
|
||||
|
||||
function TriGrams(word) {
|
||||
|
||||
}
|
||||
|
||||
function BiGrams(word) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
module.exports = {
|
||||
Init,
|
||||
MostProbableAlternateQueries,
|
||||
MostProbableMissSpelling,
|
||||
};
|
||||
@@ -5,6 +5,8 @@ const API = require('./routes/api.js');
|
||||
|
||||
const Database = require('./database/database.js');
|
||||
|
||||
const ngrams = require('./controllers/n-grams.js');
|
||||
|
||||
async function main() {
|
||||
Config.Load();
|
||||
|
||||
@@ -21,6 +23,12 @@ async function main() {
|
||||
|
||||
Server.Listen(process.env.PORT);
|
||||
API.Init();
|
||||
|
||||
await ngrams.Init();
|
||||
|
||||
ngrams.MostProbableAlternateQueries('brick 2x10x4');
|
||||
ngrams.MostProbableAlternateQueries('lego star wars battlefront');
|
||||
ngrams.MostProbableAlternateQueries('lego stor was s');
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
Reference in New Issue
Block a user