Skip to main content

PG Accuracy Scores

We would like to create procedure group (PG)-specific accuracy scoring methodology.

Similar to labs, we define a "consistent percentage of medicare rate" as having at least 30 rates for a given PG/network combination AND having the difference between % of Medicare rates' 5th and 95th percentiles be less than 0.25.

------------------------------------
-- PG HIERARCHY:
-- SCORE = 5: has_consistent_pct_of_medicare_rate AND percent between 0.9 and 3.5 (10th and 90th percentiles of consistent rates)
-- SCORE = 4: has_consistent_pct_of_medicare_rate AND percent between 0.5 and 5.5 (1st and 99th percentiles of consistent rates)
-- SCORE = 3: percent between 0.9 and 3.5
-- SCORE = 2: not an outlier 0.5 and 5.5 (captures ~98% of posted PG rates)
------------------------------------

Distributions

Validated Rates

canonical_rate_percent_of_medicare
count49996
mean2.26526
std1.78897
min0.12
1%0.71
5%1
10%1.11
25%1.34
50%1.78
75%2.6
90%3.82
95%4.7
99%8.44
max29.96
df = pd.read_sql(f"""
SELECT
canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score = 5
AND provider_type = 'Physician Group'
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)

print(df['canonical_rate_percent_of_medicare'].describe())

All Non-Outlier Rates

canonical_rate_percent_of_medicare
count49998
mean1.86677
std1.93384
min0.1
1%0.35
5%0.42
10%0.6
25%0.92
50%1.37
75%2.22
90%3.42
95%4.5315
99%8.87
max29.71
df = pd.read_sql(f"""
SELECT
canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score > 1
AND provider_type = 'Physician Group'
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)

print(
df['canonical_rate_percent_of_medicare']
.describe(percentiles=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99])
.to_markdown()
)

Consistent Rates

canonical_rate_percent_of_medicare
count6394
mean1.67522
std1.44894
min0
1%0.6793
5%0.8
10%0.93
25%1.04
50%1.23
75%1.65
90%3.48
95%4.2435
99%5.5
max49.75
df = pd.read_sql(f"""
WITH
df AS (
SELECT
provider_id,
network_id,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.05) as p05,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.25) as p25,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.5) as p50,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.75) as p75,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.95) as p95,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score > 1
AND provider_type = 'Physician Group'
GROUP BY 1,2
),
consistent AS (
SELECT
*
FROM df
WHERE
p95 - p05 < 0.25
AND n_rates >= 30
)
SELECT canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
JOIN consistent USING (provider_id, network_id)
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)

print(
df['canonical_rate_percent_of_medicare']
.describe(percentiles=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99])
.to_markdown()
)