Quickstart
Get a basic ConversionOS propensity scoring pipeline running in BigQuery in under an hour.
Prerequisites
- Google Cloud project with BigQuery enabled
- Dataset with at least 90 days of customer event data
- A target conversion event defined (purchase, signup, deposit, etc.)
Step 1: Create the Feature Table
CREATE OR REPLACE TABLE `your_project.conversionos.feature_table` AS
WITH user_events AS (
SELECT
user_id,
event_name,
event_date,
event_timestamp,
-- Behavioral features
COUNTIF(event_name = 'page_view') AS total_pageviews,
COUNTIF(event_name = 'page_view'
AND event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)) AS pageviews_7d,
COUNTIF(event_name = 'page_view'
AND event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)) AS pageviews_30d,
-- Engagement features
COUNT(DISTINCT event_date) AS active_days,
DATE_DIFF(CURRENT_DATE(), MAX(event_date), DAY) AS days_since_last_activity,
-- Session features
COUNT(DISTINCT session_id) AS total_sessions,
AVG(session_duration_seconds) AS avg_session_duration,
FROM `your_project.analytics.events`
WHERE event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
GROUP BY user_id
),
conversions AS (
SELECT
user_id,
1 AS converted
FROM `your_project.analytics.events`
WHERE event_name = 'purchase' -- Replace with your conversion event
AND event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
GROUP BY user_id
)
SELECT
e.*,
SAFE_DIVIDE(e.pageviews_7d, e.pageviews_30d) AS engagement_recency_ratio,
SAFE_DIVIDE(e.total_sessions, e.active_days) AS sessions_per_active_day,
COALESCE(c.converted, 0) AS label
FROM user_events e
LEFT JOIN conversions c USING(user_id);
Step 2: Train the XGBoost Model
CREATE OR REPLACE MODEL `your_project.conversionos.propensity_model`
OPTIONS(
model_type='BOOSTED_TREE_CLASSIFIER',
input_label_cols=['label'],
num_parallel_tree=5,
max_tree_depth=6,
learn_rate=0.1,
l1_reg=1.0,
l2_reg=1.0,
early_stop=TRUE,
min_split_loss=0.1,
data_split_method='RANDOM',
data_split_eval_fraction=0.2,
enable_global_explain=TRUE
) AS
SELECT
total_pageviews,
pageviews_7d,
pageviews_30d,
active_days,
days_since_last_activity,
total_sessions,
avg_session_duration,
engagement_recency_ratio,
sessions_per_active_day,
label
FROM `your_project.conversionos.feature_table`;
Step 3: Score Prospects
CREATE OR REPLACE TABLE `your_project.conversionos.scored_prospects` AS
SELECT
user_id,
predicted_label,
ROUND(prob, 4) AS propensity_score,
CASE
WHEN prob >= 0.7 THEN 'HIGH'
WHEN prob >= 0.4 THEN 'MEDIUM'
WHEN prob >= 0.15 THEN 'LOW'
ELSE 'EXCLUDE'
END AS propensity_tier,
CASE
WHEN prob >= 0.7 THEN 240
WHEN prob >= 0.4 THEN 50
WHEN prob >= 0.15 THEN 10
ELSE 1
END AS conversion_value
FROM ML.PREDICT(
MODEL `your_project.conversionos.propensity_model`,
(SELECT * FROM `your_project.conversionos.feature_table`)
), UNNEST(predicted_label_probs) AS prob_struct
WHERE prob_struct.label = 1
Step 4: Evaluate Model Performance
SELECT * FROM ML.EVALUATE(
MODEL `your_project.conversionos.propensity_model`,
(SELECT * FROM `your_project.conversionos.feature_table`)
);
Key metrics to monitor:
- AUC-ROC: Target > 0.80 for production deployment
- Log loss: Lower is better, compare against baseline
- Precision/Recall at threshold: Tune based on business tolerance
Step 5: Export to CDP
Push scored prospects to your CDP via:
- BigQuery → ActionIQ: Native connector, schedule daily sync
- BigQuery → Segment: Use Reverse ETL (Census, Hightouch)
- BigQuery → Custom: Export via scheduled query to GCS, pick up via API
# Example: Export scores to GCS for CDP ingestion
from google.cloud import bigquery
client = bigquery.Client()
destination_uri = "gs://your-bucket/conversionos/scores/daily/*.csv"
dataset_ref = bigquery.DatasetReference("your_project", "conversionos")
table_ref = dataset_ref.table("scored_prospects")
extract_job = client.extract_table(
table_ref,
destination_uri,
location="US",
)
extract_job.result()
Next Steps
- Propensity Scoring Module — Deep dive into feature engineering
- CDP Audience Taxonomy — Build 200+ segments from scores
- Google Ads Engine — Feed conversion values to ad platforms