dbt models

This commit is contained in:
Dan Corley
2025-04-11 23:36:43 -05:00
parent 394bd86bfd
commit 071140a67d
23 changed files with 591 additions and 1 deletions

41
duck_dbt/.sqlfluff Normal file
View File

@@ -0,0 +1,41 @@
[sqlfluff]
templater = dbt
dialect = duckdb
nocolor = True
max_line_length = 120
large_file_skip_byte_limit = 36000
exclude_rules =
L009,
L031,
L034,
L022,
L050,
L051,
L032,
CV11
[sqlfluff:indentation]
tab_space_size = 2
indent_unit = space
indented_on_contents = False
[sqlfluff:layout:type:binary_operator]
spacing_within = touch
line_position = leading
[sqlfluff:layout:type:comma]
spacing_before = touch
line_position = leading
[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = lower
[sqlfluff:rules:aliasing.table] # tables
aliasing = explicit
[sqlfluff:rules:aliasing.column] # columns
aliasing = explicit
[sqlfluff:rules:capitalisation.identifiers] # aliases
extended_capitalisation_policy = lower

4
duck_dbt/.sqlfluffignore Normal file
View File

@@ -0,0 +1,4 @@
docker/
data/
metabase_db/
target/

1
duck_dbt/.user.yml Normal file
View File

@@ -0,0 +1 @@
id: afe80c86-f1f2-4294-b8f6-f2ac2cb7adc1

View File

@@ -0,0 +1,10 @@
config:
manifest_path: dbt/target/manifest.json
metabase_url: http://localhost:3000
metabase_username: modify@this_email.com
metabase_password: super_secret_password
metabase_use_http: true
http_timeout: 600
skip_sources: true
models:
metabase_database: "duckdb"

22
duck_dbt/dbt_project.yml Normal file
View File

@@ -0,0 +1,22 @@
name: 'dbt_orange_theory'
version: '1.0.0'
config-version: 2
profile: 'dbt_orange_theory'
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
clean-targets:
- "target"
- "dbt_packages"
models:
dbt_orange_theory:
+schema: source_models
+docs:
node_color: silver

View File

@@ -0,0 +1,11 @@
{% macro generate_schema_name(custom_schema_name, node) -%}
{%- if custom_schema_name is not none -%}
{{ custom_schema_name | trim }}
{%- else -%}
{{ default_schema }}
{%- endif -%}
{%- endmacro %}

View File

@@ -0,0 +1,29 @@
with zones_cte as (
select
booking_id
, zone_time_minutes__gray as gray
, zone_time_minutes__blue as blue
, zone_time_minutes__green as green
, zone_time_minutes__orange as orange
, zone_time_minutes__red as red
from
{{ ref('performance_summaries') }}
)
, unpivot_cte as (
unpivot zones_cte
on gray, blue, green, orange, red
into
name zone
value minutes_in_zone
)
select
*
, minutes_in_zone / sum(minutes_in_zone) over (
partition by booking_id
) as percent_minutes_in_zone
from
unpivot_cte

View File

@@ -0,0 +1,8 @@
select
load_id
, schema_name
, status
, inserted_at
, schema_version_hash
from
{{ source('dlt_metadata', '_dlt_loads') }}

View File

@@ -0,0 +1,10 @@
select
version
, engine_version
, pipeline_name
, state
, created_at
, version_hash
, _dlt_load_id
from
{{ source('dlt_metadata', '_dlt_pipeline_state') }}

View File

@@ -0,0 +1,9 @@
select
version_hash
, schema_name
, version
, engine_version
, inserted_at
, schema
from
{{ source('dlt_metadata', '_dlt_version') }}

View File

@@ -0,0 +1,83 @@
anchors: #https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors
- anchor: &member_ref
name: member_uuid
data_tests:
- relationships:
to: ref('me')
field: member_uuid
- anchor: &booking_ref
name: booking_id
data_tests:
- relationships:
to: ref('bookings')
field: booking_id
- anchor: &dlt_load_id
name: _dlt_load_id
data_tests:
- relationships:
to: ref('dlt_loads')
field: load_id
models:
- name: body_composition
columns:
- *member_ref
- *dlt_load_id
- name: bookings
columns:
- *member_ref
- *dlt_load_id
- name: heart_rate
columns:
- *member_ref
- *dlt_load_id
- name: dlt_record_id
data_tests:
- not_null
- unique
- name: heart_rate__history
columns:
- name: dlt_parent_id
data_tests:
- relationships:
to: ref('heart_rate')
field: dlt_record_id
- name: me
columns:
- *dlt_load_id
- name: member_uuid
data_tests:
- not_null
- unique
- name: performance_summaries
columns:
- *dlt_load_id
- *booking_ref
- name: telemetry
columns:
- *dlt_load_id
- *booking_ref
- name: dlt_record_id
data_tests:
- not_null
- unique
- name: telemetry__telemetry
columns:
- name: dlt_parent_id
data_tests:
- relationships:
to: ref('telemetry')
field: dlt_record_id
- name: zone_summary
columns:
- *booking_ref
- name: dlt_loads
columns:
- name: load_id
data_tests:
- not_null
- unique
- name: dlt_pipeline_state
- name: dlt_version

View File

@@ -0,0 +1,83 @@
select
scan_result_uu_id as scan_result_uuid
, member_uu_id as member_uuid
, member_id
, email
, height
, gender
, age
, test_datetime
, weight
, tbw
, icw
, ecw
, dlm
, bfm
, lbm
, smm
, bmi
, pbf
, lbm_of_right_arm
, lbm_percent_of_right_arm
, lbm_of_left_arm
, lbm_percent_of_left_arm
, lbm_of_trunk
, lbm_percent_of_trunk
, lbm_of_right_leg
, lbm_percent_of_right_leg
, lbm_of_left_leg
, lbm_percent_of_left_leg
, tbw_of_right_arm
, tbw_of_left_arm
, tbw_of_trunk
, tbw_of_right_leg
, tbw_of_left_leg
, icw_of_right_arm
, icw_of_left_arm
, icw_of_trunk
, icw_of_right_leg
, icw_of_left_leg
, ecw_of_right_arm
, ecw_of_left_arm
, ecw_of_trunk
, ecw_of_right_leg
, ecw_of_left_leg
, ecw_over_tbw
, ecw_over_tbw_of_right_arm
, ecw_over_tbw_of_left_arm
, ecw_over_tbw_of_trunk
, ecw_over_tbw_of_right_leg
, ecw_over_tbw_of_left_leg
, bfm_of_right_arm
, bfm_percent_of_right_arm
, bfm_of_left_arm
, bfm_percent_of_left_arm
, bfm_of_trunk
, bfm_percent_of_trunk
, bfm_of_right_leg
, bfm_percent_of_right_leg
, bfm_of_left_leg
, bfm_percent_of_left_leg
, bfm_control
, lbm_control
, bmr
, vfl
, vfa
, bcm
, tbw_over_lbm
, in_body_type
, wt_graph_scale
, smm_graph_scale
, bfm_graph_scale
, pwt
, psmm
, pfatnew
, bfm__v_double
, lbm_percent_of_right_arm__v_double
, lbm_percent_of_trunk__v_double
, lbm_percent_of_right_leg__v_double
, lbm_of_left_leg__v_double
, _dlt_load_id
, _dlt_id
from
{{ source('orange_theory_delta', 'body_composition') }}

View File

@@ -0,0 +1,58 @@
select
id as booking_id
, paying_studio_id
, person_id
, member_id as member_uuid
, service_name
, checked_in
, cross_regional
, late_canceled
, intro
, mbo_booking_id
, mbo_unique_id
, mbo_paying_unique_id
, canceled
, created_at
, updated_at
, ratable
, workout__id as class_history_uuid
, workout__calories_burned
, workout__splat_points
, workout__step_count
, workout__active_time_seconds
, workout__zone_time_minutes__gray
, workout__zone_time_minutes__blue
, workout__zone_time_minutes__green
, workout__zone_time_minutes__orange
, workout__zone_time_minutes__red
, class__id
, class__name
, class__ot_base_class_uuid
, class__type
, class__starts_at_local
, class__starts_at
, class__studio__id
, class__studio__name
, class__studio__mbo_studio_id
, class__studio__time_zone
, class__studio__email
, class__studio__address__line1
, class__studio__address__city
, class__studio__address__state
, class__studio__address__country
, class__studio__address__postal_code
, class__studio__currency_code
, class__studio__phone_number
, class__studio__latitude
, class__studio__longitude
, class__coach__first_name
, ratings__coach__id
, ratings__coach__description
, ratings__coach__value
, ratings__class__id
, ratings__class__description
, ratings__class__value
, _dlt_load_id
, _dlt_id
from
{{ source('orange_theory_delta', 'bookings') }}

View File

@@ -0,0 +1,6 @@
select
_dlt_id as dlt_record_id
, member_uuid
, _dlt_load_id
from
{{ source('orange_theory_delta', 'heart_rate') }}

View File

@@ -0,0 +1,22 @@
select
_dlt_id as dlt_record_id
, _dlt_parent_id as dlt_parent_id
, max_hr__type
, max_hr__value
, zones__gray__start_bpm
, zones__gray__end_bpm
, zones__blue__start_bpm
, zones__blue__end_bpm
, zones__green__start_bpm
, zones__green__end_bpm
, zones__orange__start_bpm
, zones__orange__end_bpm
, zones__red__start_bpm
, zones__red__end_bpm
, change_from_previous
, change_bucket
, assigned_at
, _dlt_root_id
, _dlt_list_idx
from
{{ source('orange_theory_delta', 'heart_rate__history') }}

View File

@@ -0,0 +1,51 @@
select
id
, member_uuid
, email
, phone_number
, first_name
, last_name
, communication_preferences__email__marketing_opt_in as email__marketing_opt_in
, communication_preferences__email__transactional_opt_in as email__transactional_opt_in
, communication_preferences__sms__marketing_opt_in as sms__marketing_opt_in
, communication_preferences__sms__transactional_opt_in as sms__transactional_opt_in
, postal_code
, date_of_birth
, sex
, locale
, unit_of_measure
, weight_unit
, weight_value
, height_unit
, height_value
, leaderboard_username
, home_studio__id
, home_studio__mbo_studio_id
, home_studio__name
, home_studio__status
, home_studio__license_number
, home_studio__country_code
, home_studio__address__line1
, home_studio__address__city
, home_studio__address__state
, home_studio__address__postal_code
, home_studio__address__country
, mbo_home_studio_id
, mbo_client_id
, mbo_unique_id
, cognito_id
, image_url
, active_device__id
, active_device__type
, active_device__version
, mailing_address__first_name
, mailing_address__last_name
, mailing_address__line1
, mailing_address__city
, mailing_address__state
, mailing_address__country
, mailing_address__postal_code
, _dlt_load_id
, _dlt_id
from
{{ source('orange_theory_delta', 'me') }}

View File

@@ -0,0 +1,63 @@
select
id as performance_summary_id
, _bookings_id as booking_id
, details__calories_burned as calories_burned
, details__splat_points as splat_points
, details__step_count as step_count
, details__active_time_seconds as active_time_seconds
, details__zone_time_minutes__gray as zone_time_minutes__gray
, details__zone_time_minutes__blue as zone_time_minutes__blue
, details__zone_time_minutes__green as zone_time_minutes__green
, details__zone_time_minutes__orange as zone_time_minutes__orange
, details__zone_time_minutes__red as zone_time_minutes__red
, details__heart_rate__max_hr as max_hr
, details__heart_rate__peak_hr as peak_hr
, details__heart_rate__peak_hr_percent as peak_hr_percent
, details__heart_rate__avg_hr as avg_hr
, details__heart_rate__avg_hr_percent as avg_hr_percent
, details__equipment_data__treadmill__avg_speed__display_value as treadmill__avg_speed_value
, details__equipment_data__treadmill__avg_speed__display_unit as treadmill__avg_speed_unit
, details__equipment_data__treadmill__max_speed__display_value as treadmill__max_speed_value
, details__equipment_data__treadmill__max_speed__display_unit as treadmill__max_speed_unit
, details__equipment_data__treadmill__avg_incline__display_value as treadmill__avg_incline_value
, details__equipment_data__treadmill__avg_incline__display_unit as treadmill__avg_incline_unit
, details__equipment_data__treadmill__max_incline__display_value as treadmill__max_incline_value
, details__equipment_data__treadmill__max_incline__display_unit as treadmill__max_incline_unit
, details__equipment_data__treadmill__avg_pace__display_value as treadmill__avg_pace_value
, details__equipment_data__treadmill__avg_pace__display_unit as treadmill__avg_pace_unit
, details__equipment_data__treadmill__max_pace__display_value as treadmill__max_pace_value
, details__equipment_data__treadmill__max_pace__display_unit as treadmill__max_pace_unit
, details__equipment_data__treadmill__total_distance__display_value as treadmill__total_distance_value
, details__equipment_data__treadmill__total_distance__display_unit as treadmill__total_distance_unit
, details__equipment_data__treadmill__moving_time__display_value as treadmill__moving_time_value
, details__equipment_data__treadmill__moving_time__display_unit as treadmill__moving_time_unit
, details__equipment_data__treadmill__elevation_gained__display_value as treadmill__elevation_gained_value
, details__equipment_data__treadmill__elevation_gained__display_unit as treadmill__elevation_gained_unit
, details__equipment_data__rower__avg_power__display_value as rower__avg_power_value
, details__equipment_data__rower__avg_power__display_unit as rower__avg_power_unit
, details__equipment_data__rower__max_power__display_value as rower__max_power_value
, details__equipment_data__rower__max_power__display_unit as rower__max_power_unit
, details__equipment_data__rower__avg_speed__display_value as rower__avg_speed_value
, details__equipment_data__rower__avg_speed__display_unit as rower__avg_speed_unit
, details__equipment_data__rower__max_speed__display_value as rower__max_speed_value
, details__equipment_data__rower__max_speed__display_unit as rower__max_speed_unit
, details__equipment_data__rower__avg_pace__display_value as rower__avg_pace_value
, details__equipment_data__rower__avg_pace__display_unit as rower__avg_pace_unit
, details__equipment_data__rower__max_pace__display_value as rower__max_pace_value
, details__equipment_data__rower__max_pace__display_unit as rower__max_pace_unit
, details__equipment_data__rower__avg_cadence__display_value as rower__avg_cadence_value
, details__equipment_data__rower__avg_cadence__display_unit as rower__avg_cadence_unit
, details__equipment_data__rower__max_cadence__display_value as rower__max_cadence_value
, details__equipment_data__rower__max_cadence__display_unit as rower__max_cadence_unit
, details__equipment_data__rower__total_distance__display_value as rower__total_distance_value
, details__equipment_data__rower__total_distance__display_unit as rower__total_distance_unit
, details__equipment_data__rower__moving_time__display_value as rower__moving_time_value
, details__equipment_data__rower__moving_time__display_unit as rower__moving_time_unit
, ratable
, class__starts_at_local
, class__name
, class__type
, _dlt_load_id
, _dlt_id as record_id
from
{{ source('orange_theory_delta', 'performance_summaries') }}

View File

@@ -0,0 +1,21 @@
select
_dlt_id as dlt_record_id
, _bookings_id as booking_id
, class_history_uuid
, class_start_time
, member_uuid
, max_hr
, zones__gray__start_bpm
, zones__gray__end_bpm
, zones__blue__start_bpm
, zones__blue__end_bpm
, zones__green__start_bpm
, zones__green__end_bpm
, zones__orange__start_bpm
, zones__orange__end_bpm
, zones__red__start_bpm
, zones__red__end_bpm
, window_size
, _dlt_load_id
from
{{ source('orange_theory_delta', 'telemetry') }}

View File

@@ -0,0 +1,18 @@
select
_dlt_id as dlt_record_id
, _dlt_parent_id as dlt_parent_id
, relative_timestamp
, hr
, agg_splats
, agg_calories
, row_data__row_speed
, row_data__row_pps
, row_data__row_spm
, row_data__agg_row_distance
, row_data__row_pace
, tread_data__tread_speed
, tread_data__tread_incline
, tread_data__agg_tread_distance
, _dlt_list_idx
from
{{ source('orange_theory_delta', 'telemetry__telemetry') }}

View File

@@ -0,0 +1,32 @@
version: 2
sources:
- name: orange_theory_delta
meta:
external_location: "delta_scan('{prefix}/{name}/')"
prefix: "otf_api_data"
tables:
- name: telemetry
- name: body_composition
- name: bookings
- name: heart_rate
- name: heart_rate__history
- name: me
- name: performance_summaries
- name: telemetry__telemetry
- name: challenges
- name: challenges__benchmarks__years
- name: challenges__benchmarks
- name: challenges__challenges__years
- name: challenges__challenges
- name: challenges__programs__years
- name: challenges__programs
- name: dlt_metadata
meta:
external_location: "'{prefix}/{name}/*.jsonl'"
prefix: "otf_api_data"
tables:
- name: _dlt_loads
- name: _dlt_pipeline_state
- name: _dlt_version

8
duck_dbt/profiles.yml Normal file
View File

@@ -0,0 +1,8 @@
dbt_orange_theory:
outputs:
dev:
type: duckdb
path: otf_api_data/duck.db
plugins:
- module: delta
target: dev

0
duck_dbt/tests/.gitkeep Normal file
View File

View File

@@ -21,7 +21,7 @@ def load_orange_theory(args=args) -> None:
def run_dbt(args=args) -> None:
dbt = dbtRunner()
cli_args = ["run", "--project-dir", "dbt", "--profiles-dir", "dbt"]
cli_args = ["run", "--project-dir", "duck_dbt", "--profiles-dir", "duck_dbt"]
if args.full_refresh:
cli_args.append("--full-refresh")