Advanced Validation
A validation with a comprehensive set of rules.
import pointblank as pb
import polars as pl
import narwhals as nw
= (
validation
pb.Validate(=pb.load_dataset(dataset="game_revenue", tbl_type="polars"),
data="game_revenue",
tbl_name="Comprehensive validation example",
label=pb.Thresholds(warn_at=0.10, stop_at=0.25, notify_at=0.35),
thresholds
)="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1
.col_vals_regex(columns="session_duration", value=5) # STEP 2
.col_vals_gt(columns="item_revenue", value=0.02) # STEP 3
.col_vals_ge(columns="item_type", set=["iap", "ad"]) # STEP 4
.col_vals_in_set(columns# STEP 5
.col_vals_in_set( ="acquisition",
columnsset=["google", "facebook", "organic", "crosspromo", "other_campaign"]
)="country", set=["Mongolia", "Germany"]) # STEP 6
.col_vals_not_in_set(columns# STEP 7
.col_vals_between( ="session_duration",
columns=10, right=50,
left= lambda df: df.select(pl.median("session_duration"))
pre
)=["player_id", "session_id", "time"]) # STEP 8
.rows_distinct(columns_subset=2000) # STEP 9
.row_count_match(count=11) # STEP 10
.col_count_match(count=pb.starts_with("item")) # STEPS 11-13
.col_vals_not_null(columns="start_day") # STEP 14
.col_exists(columns
.interrogate()
)
validation
Preview of Input Table
PolarsRows2000Columns11 |
|||||||||||