mirror of
https://github.com/trailbaseio/trailbase.git
synced 2026-01-06 01:40:12 -06:00
Add a "coffee vector search" tutorial to the docs.
This commit is contained in:
@@ -25,7 +25,7 @@ export default defineConfig({
|
||||
},
|
||||
plugins: [
|
||||
starlightLinksValidator({
|
||||
exclude: ["http://localhost:4000/**/*"],
|
||||
exclude: ["http://localhost:4000/", "http://localhost:4000/**/*"],
|
||||
}),
|
||||
],
|
||||
sidebar: [
|
||||
@@ -37,8 +37,12 @@ export default defineConfig({
|
||||
slug: "getting-started/starting-up",
|
||||
},
|
||||
{
|
||||
label: "First App",
|
||||
slug: "getting-started/first-app",
|
||||
label: "First UI+TS App",
|
||||
slug: "getting-started/first-ui-app",
|
||||
},
|
||||
{
|
||||
label: "First CLI App",
|
||||
slug: "getting-started/first-cli-app",
|
||||
},
|
||||
{
|
||||
label: "Philosophy",
|
||||
|
||||
@@ -20,7 +20,7 @@ Note further, that the pool of workers/isolates does not share state, i.e. you
|
||||
cannot use global state to reliably share state across requests. You should
|
||||
rely on the database for persisting and sharing state.
|
||||
|
||||
## Http Endpoints
|
||||
## HTTP Endpoints
|
||||
|
||||
The following example illustrates a few things:
|
||||
|
||||
|
||||
@@ -1,22 +1,17 @@
|
||||
---
|
||||
title: First App
|
||||
description: A guide in my new Starlight docs site.
|
||||
title: First CLI App
|
||||
---
|
||||
|
||||
import { Code } from "@astrojs/starlight/components";
|
||||
import { Aside } from "@astrojs/starlight/components";
|
||||
|
||||
{/*
|
||||
import Readme from "../../../../../examples/tutorial/README.md";
|
||||
<Readme />
|
||||
*/}
|
||||
|
||||
In this tutorial, we'll set up a database with an IMDB test dataset, spin up
|
||||
TrailBase and write a small program to access the data.
|
||||
TrailBase and write a simple client CLI application to access the data using
|
||||
*Record APIs*.
|
||||
|
||||
In an effort to demonstrate TrailBase's loose coupling and the possibility of
|
||||
simply trying out TrailBase with an existing SQLite-based data analysis
|
||||
project, we will also offer a alternative path to bootstrapping the database
|
||||
project, we will also offer an alternative path to bootstrapping the database
|
||||
using the vanilla `sqlite3` CLI.
|
||||
|
||||
<Aside type="note" title="Running TrailBase">
|
||||
@@ -108,7 +103,7 @@ sqlite> SELECT COUNT(*) FROM movies;
|
||||
1000
|
||||
```
|
||||
|
||||
## Accessing the Data
|
||||
## Reading the Data
|
||||
|
||||
With TrailBase up and running (`trail run`), the easiest way to explore your
|
||||
data is go to the admin dashboard under
|
||||
@@ -150,10 +145,10 @@ curl -g 'localhost:4000/api/records/v1/movies?limit=3&order=rank&watch_time[lt]=
|
||||
You can also use your browser. Either way, you should see some JSON output with
|
||||
the respective movies.
|
||||
|
||||
## Type-Safe APIs and Mutations
|
||||
## Writing Data with a Custom CLI
|
||||
|
||||
Finally, let's authenticate and use privileged APIs to first delete all movies
|
||||
and then add them pack using type-safe APIs rather than `sqlite3`.
|
||||
and then add them back using type-safe APIs rather than `sqlite3`.
|
||||
|
||||
Let's first create the JSON Schema type definitions from the database schema we
|
||||
added above. Note, that the type definition for creation, reading, and updating
|
||||
285
docs/src/content/docs/getting-started/first-ui-app.mdx
Normal file
285
docs/src/content/docs/getting-started/first-ui-app.mdx
Normal file
@@ -0,0 +1,285 @@
|
||||
---
|
||||
title: First UI + TypeScript App
|
||||
---
|
||||
|
||||
import screenshot from "../../../../../examples/coffeesearch/assets/screenshot.png";
|
||||
|
||||
In this tutorial, we'll set up a database with coffee data, implement a custom
|
||||
handler for vector search in TypeScript, and a simple web UI all in ~100 lines
|
||||
of code.
|
||||
|
||||
|
||||
<div class="flex justify-center">
|
||||
<div class="w-[80%] shadow-lg ">
|
||||

|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/*
|
||||
<div class="flex justify-center">
|
||||
<Image
|
||||
class="w-[80%] "
|
||||
src={screenshot}
|
||||
alt="Screenshot of Coffee Vector Search App"
|
||||
/>
|
||||
</div>
|
||||
*/}
|
||||
|
||||
## Importing the Data
|
||||
|
||||
The dataset live under `/examples/coffeesearch/arabica_data_cleaned.csv`. We'll
|
||||
use the `sqlite3` CLI and the following SQL script to import the initial data:
|
||||
|
||||
```sql
|
||||
-- First create the strictly typed "coffee" table.
|
||||
CREATE TABLE IF NOT EXISTS coffee (
|
||||
Species TEXT,
|
||||
Owner TEXT,
|
||||
|
||||
Aroma REAL,
|
||||
Flavor REAL,
|
||||
Acidity REAL,
|
||||
Sweetness REAL,
|
||||
|
||||
embedding BLOB
|
||||
) STRICT;
|
||||
|
||||
-- Then import the data into a "temporary" table.
|
||||
.mode csv
|
||||
.import arabica_data_cleaned.csv temporary
|
||||
|
||||
-- Then import the temporary data into the "coffee" table.
|
||||
INSERT INTO coffee (Species, Owner, Aroma, Flavor, Acidity, Sweetness)
|
||||
SELECT
|
||||
Species,
|
||||
Owner,
|
||||
|
||||
CAST(Aroma AS REAL) AS Aroma,
|
||||
CAST(Flavor AS REAL) AS Flavor,
|
||||
CAST(Acidity AS REAL) AS Acidity,
|
||||
CAST(Sweetness AS REAL) AS Sweetness
|
||||
FROM temporary;
|
||||
|
||||
-- And clean up.
|
||||
DROP TABLE temporary;
|
||||
```
|
||||
|
||||
Note that we didn't initialize the vector `embedding`. This is because the
|
||||
`sqlite3` CLI doesn't have the necessary extension functions built-in.
|
||||
We'll update the entries to add the embedding later as part of a TrailBase
|
||||
migration.
|
||||
|
||||
With this script as `import.sql` and assuming `/example/coffeesearch` is your
|
||||
current working directory, you can run:
|
||||
|
||||
```bash
|
||||
$ cat import.sql | sqlite3 traildepot/data/main.db -
|
||||
```
|
||||
|
||||
Now with the initial import, let's start TrailBase for the first time. This
|
||||
will will apply the following migration under
|
||||
`/examples/coffeesearch/traildepot/migrations`, which is basically:
|
||||
|
||||
```sql
|
||||
UPDATE coffee SET embedding = VECTOR(FORMAT("[%f, %f, %f, %f]", Aroma, Flavor, Acidity, Sweetness))
|
||||
```
|
||||
|
||||
to initialize the vector embeddings from our different coffee notes. So we run:
|
||||
|
||||
```bash
|
||||
$ trail run --dev
|
||||
```
|
||||
|
||||
If the server comes up successfully you've done everything correctly.
|
||||
|
||||
# A JS/TS Vector Search Handler
|
||||
|
||||
Now we need a custom API endpoint our UI can call later to look up with coffee
|
||||
most closely resembles our requested combination of notes.
|
||||
For that we place the following definition into
|
||||
`/examples/coffeesearch/traildepot/scripts/index.ts`:
|
||||
|
||||
```ts
|
||||
import {
|
||||
addRoute,
|
||||
jsonHandler,
|
||||
parsePath,
|
||||
query
|
||||
} from "../trailbase.js";
|
||||
|
||||
addRoute(
|
||||
"GET",
|
||||
"/search",
|
||||
jsonHandler(async (req) => {
|
||||
const searchParams = parsePath(req.uri).query;
|
||||
|
||||
const aroma = searchParams?.get("aroma") ?? 8;
|
||||
const flavor = searchParams?.get("flavor") ?? 8;
|
||||
const acidity = searchParams?.get("acidity") ?? 8;
|
||||
const sweetness = searchParams?.get("sweetness") ?? 8;
|
||||
|
||||
return await query(`
|
||||
SELECT
|
||||
Owner,
|
||||
Aroma,
|
||||
Flavor,
|
||||
Acidity,
|
||||
Sweetness,
|
||||
vector_distance_cos(
|
||||
embedding,
|
||||
'[${aroma}, ${flavor}, ${acidity}, ${sweetness}]'
|
||||
) AS distance
|
||||
FROM
|
||||
coffee
|
||||
WHERE
|
||||
embedding IS NOT NULL AND distance < 0.2
|
||||
ORDER BY
|
||||
distance
|
||||
LIMIT 100`, []);
|
||||
}),
|
||||
);
|
||||
```
|
||||
|
||||
This custom handler will let us query the coffees that most closely resemble
|
||||
our desired combination of notes, e.g.:
|
||||
|
||||
```bash
|
||||
$ curl "http://localhost:4000/search?aroma=8&flavor=8&acidity=8&sweetness=8"
|
||||
[
|
||||
["juan luis alvarado romero",7.92,7.58,7.58,8,0.0003054438275285065],
|
||||
["eileen koyanagi",7.5,7.33,7.58,8,0.000525727984495461],
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
If we're only interested in the API, we're basically done. Otherwise, the
|
||||
following section will implement a simple React web App.
|
||||
|
||||
## A simple Web UI
|
||||
|
||||
We recommend [vite](https://vite.dev/guide/) for setting up a simple SPA, e.g.:
|
||||
|
||||
```bash
|
||||
$ pnpm create vite@latest my-project -- --template react
|
||||
```
|
||||
|
||||
We can then swap out the template with `my-project/src/App.tsx`:
|
||||
|
||||
```tsx
|
||||
import { useState, useEffect } from "react";
|
||||
import "./App.css";
|
||||
|
||||
type Data = Array<Array<object>>;
|
||||
|
||||
async function fetchData(v: {
|
||||
aroma: number;
|
||||
flavor: number;
|
||||
acidity: number;
|
||||
sweetness: number;
|
||||
}): Promise<Data> {
|
||||
const URL = import.meta.env.DEV ? "http://localhost:4000" : "";
|
||||
const params = Object.entries(v).map(([k, v]) => `${k}=${v}`).join("&");
|
||||
const response = await fetch(`${URL}/search?${params}`);
|
||||
return await response.json();
|
||||
}
|
||||
|
||||
const Input = (props: {
|
||||
label: string;
|
||||
value: number;
|
||||
update: (v: number) => void;
|
||||
}) => (
|
||||
<>
|
||||
<label>{props.label}:</label>
|
||||
<input
|
||||
type="number"
|
||||
step="0.1"
|
||||
value={props.value}
|
||||
onChange={(el) => props.update(el.target.valueAsNumber)}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
|
||||
const Row = (props: { row: Array<object> }) => (
|
||||
<tr>
|
||||
{props.row.map((d) => (
|
||||
<td>{`${d}`}</td>
|
||||
))}
|
||||
</tr>
|
||||
);
|
||||
|
||||
function Table() {
|
||||
const [aroma, setAroma] = useState(8);
|
||||
const [flavor, setFlavor] = useState(8);
|
||||
const [acidity, setAcidity] = useState(8);
|
||||
const [sweetness, setSweetness] = useState(8);
|
||||
|
||||
const [data, setData] = useState<Data | undefined>();
|
||||
useEffect(() => {
|
||||
setData(undefined);
|
||||
fetchData({ aroma, flavor, acidity, sweetness }).then(setData);
|
||||
}, [aroma, flavor, acidity, sweetness]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="inputs">
|
||||
<Input label="Aroma" value={aroma} update={setAroma} />
|
||||
<Input label="Flavor" value={flavor} update={setFlavor} />
|
||||
<Input label="Acidity" value={acidity} update={setAcidity} />
|
||||
<Input label="Sweetness" value={sweetness} update={setSweetness} />
|
||||
</div>
|
||||
|
||||
<div className="table">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col">Owner</th>
|
||||
<th scope="col">Aroma</th>
|
||||
<th scope="col">Flavor</th>
|
||||
<th scope="col">Acidity</th>
|
||||
<th scope="col">Sweetness</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
{(data ?? []).map((row) => (
|
||||
<Row row={row} />
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export const App = () => (
|
||||
<>
|
||||
<h1>Coffee Search</h1>
|
||||
<Table />
|
||||
</>
|
||||
);
|
||||
```
|
||||
|
||||
Lastly we need to compile our `JSX/TSX` down into pure HTML, JS, and CSS our
|
||||
browser can understand. Running
|
||||
|
||||
```bash
|
||||
pnpm build
|
||||
```
|
||||
|
||||
the resulting built artifacts can be found under `my-project/dist/`.
|
||||
|
||||
### Putting Everything Together
|
||||
|
||||
Whether you've followed along or skipped to here, you can now start TrailBase:
|
||||
|
||||
* Pointing out our prepared database
|
||||
* Providing our custom `/search` endpoint
|
||||
* And serving our web artifacts in `dist/` (if you haven't built the website yet
|
||||
run `pnpm build`)
|
||||
|
||||
```bash
|
||||
trail run --public-dir <path-to-ui>/dist
|
||||
```
|
||||
|
||||
You can now browse to [new custom UI](http://localhost:4000/) check out the
|
||||
[admin dashboard](http://localhost:4000/_/admin).
|
||||
@@ -96,7 +96,7 @@ You can simply import and export data with standard SQLite tooling, e.g.:
|
||||
sqlite3 main.db < import.sql
|
||||
```
|
||||
|
||||
Also check out the [getting started](/getting-started/first-app) guide.
|
||||
Also check out the [getting started](/getting-started/first-cli-app) guide.
|
||||
|
||||
<div class="h-[50px]" />
|
||||
|
||||
|
||||
@@ -4,6 +4,6 @@ app: dist
|
||||
pnpm build
|
||||
|
||||
init:
|
||||
mkdir -p traildepot/data; cat import.sql | sqlite3 traildepot/data/main.db -
|
||||
rm -rf traildepot/data && mkdir -p traildepot/data && cat import.sql | sqlite3 traildepot/data/main.db -
|
||||
|
||||
.PHONY: init
|
||||
|
||||
BIN
examples/coffeesearch/assets/screenshot.png
Normal file
BIN
examples/coffeesearch/assets/screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 113 KiB |
@@ -11,9 +11,6 @@ CREATE TABLE IF NOT EXISTS coffee (
|
||||
embedding BLOB
|
||||
) STRICT;
|
||||
|
||||
-- Empty table for clean import.
|
||||
DELETE FROM coffee;
|
||||
|
||||
-- Go on to import data.
|
||||
DROP TABLE IF EXISTS temporary;
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import "./App.css";
|
||||
|
||||
type Data = Array<Array<object>>;
|
||||
|
||||
async function getData(v: {
|
||||
async function fetchData(v: {
|
||||
aroma: number;
|
||||
flavor: number;
|
||||
acidity: number;
|
||||
@@ -17,23 +17,29 @@ async function getData(v: {
|
||||
return await response.json();
|
||||
}
|
||||
|
||||
function Input(props: {
|
||||
const Input = (props: {
|
||||
label: string;
|
||||
value: number;
|
||||
update: (v: number) => void;
|
||||
}) {
|
||||
return (
|
||||
<>
|
||||
<label>{props.label}:</label>
|
||||
<input
|
||||
type="number"
|
||||
step="0.1"
|
||||
value={props.value}
|
||||
onChange={(el) => props.update(el.target.valueAsNumber)}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
}
|
||||
}) => (
|
||||
<>
|
||||
<label>{props.label}:</label>
|
||||
<input
|
||||
type="number"
|
||||
step="0.1"
|
||||
value={props.value}
|
||||
onChange={(el) => props.update(el.target.valueAsNumber)}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
|
||||
const Row = (props: { row: Array<object> }) => (
|
||||
<tr>
|
||||
{props.row.map((d) => (
|
||||
<td>{`${typeof d === "number" ? (d as number).toPrecision(3) : d}`}</td>
|
||||
))}
|
||||
</tr>
|
||||
);
|
||||
|
||||
function Table() {
|
||||
const [aroma, setAroma] = useState(8);
|
||||
@@ -44,19 +50,9 @@ function Table() {
|
||||
const [data, setData] = useState<Data | undefined>();
|
||||
useEffect(() => {
|
||||
setData(undefined);
|
||||
getData({ aroma, flavor, acidity, sweetness }).then((data) =>
|
||||
setData(data),
|
||||
);
|
||||
fetchData({ aroma, flavor, acidity, sweetness }).then(setData);
|
||||
}, [aroma, flavor, acidity, sweetness]);
|
||||
|
||||
const Row = (props: { row: Array<object> }) => (
|
||||
<tr>
|
||||
{props.row.map((d) => (
|
||||
<td>{`${d}`}</td>
|
||||
))}
|
||||
</tr>
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="inputs">
|
||||
|
||||
@@ -22,14 +22,16 @@ auth {
|
||||
auth_token_ttl_sec: 120
|
||||
refresh_token_ttl_sec: 2592000
|
||||
}
|
||||
record_apis: [{
|
||||
name: "_user_avatar"
|
||||
table_name: "_user_avatar"
|
||||
conflict_resolution: REPLACE
|
||||
autofill_missing_user_id_columns: true
|
||||
acl_world: [READ]
|
||||
acl_authenticated: [CREATE, READ, UPDATE, DELETE]
|
||||
create_access_rule: "_REQ_.user IS NULL OR _REQ_.user = _USER_.id"
|
||||
update_access_rule: "_ROW_.user = _USER_.id"
|
||||
delete_access_rule: "_ROW_.user = _USER_.id"
|
||||
}]
|
||||
record_apis: [
|
||||
{
|
||||
name: "_user_avatar"
|
||||
table_name: "_user_avatar"
|
||||
conflict_resolution: REPLACE
|
||||
autofill_missing_user_id_columns: true
|
||||
acl_world: [READ]
|
||||
acl_authenticated: [CREATE, READ, UPDATE, DELETE]
|
||||
create_access_rule: "_REQ_.user IS NULL OR _REQ_.user = _USER_.id"
|
||||
update_access_rule: "_ROW_.user = _USER_.id"
|
||||
delete_access_rule: "_ROW_.user = _USER_.id"
|
||||
}
|
||||
]
|
||||
|
||||
@@ -11,3 +11,8 @@ CREATE TABLE IF NOT EXISTS coffee (
|
||||
) STRICT;
|
||||
|
||||
UPDATE coffee SET embedding = VECTOR(FORMAT("[%f, %f, %f, %f]", Aroma, Flavor, Acidity, Sweetness));
|
||||
|
||||
CREATE TRIGGER _coffee__updated_trigger AFTER INSERT ON coffee FOR EACH ROW
|
||||
BEGIN
|
||||
UPDATE coffee SET embedding = VECTOR(FORMAT("[%f, %f, %f, %f]", Aroma, Flavor, Acidity, Sweetness)) WHERE _rowid_ = OLD._rowid_;
|
||||
END;
|
||||
|
||||
@@ -15,13 +15,18 @@ addRoute(
|
||||
return await query(
|
||||
`
|
||||
SELECT
|
||||
Owner, Aroma, Flavor, Acidity, Sweetness
|
||||
Owner,
|
||||
Aroma,
|
||||
Flavor,
|
||||
Acidity,
|
||||
Sweetness,
|
||||
vector_distance_cos(embedding, '[${aroma}, ${flavor}, ${acidity}, ${sweetness}]') AS distance
|
||||
FROM
|
||||
coffee
|
||||
WHERE
|
||||
embedding IS NOT NULL
|
||||
embedding IS NOT NULL AND distance < 0.2
|
||||
ORDER BY
|
||||
vector_distance_cos(embedding, '[${aroma}, ${flavor}, ${acidity}, ${sweetness}]')
|
||||
distance
|
||||
LIMIT 100
|
||||
`,
|
||||
[],
|
||||
|
||||
@@ -23,6 +23,7 @@ export type ResponseType = {
|
||||
};
|
||||
export type MaybeResponse<T> = Promise<T | undefined> | T | undefined;
|
||||
export type CallbackType = (req: RequestType) => MaybeResponse<ResponseType>;
|
||||
export type Method = "DELETE" | "GET" | "HEAD" | "OPTIONS" | "PATCH" | "POST" | "PUT" | "TRACE";
|
||||
export declare enum StatusCodes {
|
||||
CONTINUE = 100,
|
||||
SWITCHING_PROTOCOLS = 101,
|
||||
@@ -122,8 +123,8 @@ export interface JsonResponseType {
|
||||
body: object;
|
||||
}
|
||||
export declare function jsonHandler(f: (req: JsonRequestType) => MaybeResponse<JsonRequestType | object>): CallbackType;
|
||||
export declare function addRoute(method: string, route: string, callback: CallbackType): void;
|
||||
export declare function dispatch(method: string, route: string, uri: string, pathParams: [string, string][], headers: [string, string][], user: UserType | undefined, body: Uint8Array): Promise<ResponseType>;
|
||||
export declare function addRoute(method: Method, route: string, callback: CallbackType): void;
|
||||
export declare function dispatch(method: Method, route: string, uri: string, pathParams: [string, string][], headers: [string, string][], user: UserType | undefined, body: Uint8Array): Promise<ResponseType>;
|
||||
export declare function query(queryStr: string, params: unknown[]): Promise<unknown[][]>;
|
||||
export declare function execute(queryStr: string, params: unknown[]): Promise<number>;
|
||||
export type ParsedPath = {
|
||||
|
||||
Reference in New Issue
Block a user