Skip to main content

Drug Accuracy Scores

work-in-progress

Eventually, the goal is to use a provision-informed methodology to select the best drug rate. We would infer high cost OP % of charge provisions and use this to select the best rate.

In the meantime, we need to have a methodology that will offer a way to distinguish scores 2-4 (5 are validated, 1 are outliers).

Scoring Criteria

  • Score 5: Validated
  • Score 4: Between 1.25 and 3.5 (10th–90th percentiles)
  • Score 3: Between 0.9 and 4
  • Score 2: < 0.9 or > 4
  • Outlier:
    • Hospital MRF: < 0.8 or > 4
    • Payer MRF: < 0.8 or > 10

Distributions

Validated

hospitalpayer
count70922908
mean2.247312.13091
std0.8110920.786044
min1.21.2
1%1.211.21
5%1.241.23
10%1.281.26
20%1.431.374
30%1.631.55
40%1.831.758
50%2.091.97
60%2.372.14
70%2.72.42
80%3.052.82
90%3.523.43
95%3.763.77
99%3.963.96
max44
Code
# %%
df = pd.read_sql(f"""
SELECT canonical_rate_source, canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE
is_drug_code = True
AND provider_type = 'Hospital'
AND canonical_rate_score = 5
AND canonical_rate_class != 'Impute'
ORDER BY RANDOM()
LIMIT 10000
""", con=trino_conn)

# %%
print(
df
.groupby('canonical_rate_source')['canonical_rate_percent_of_medicare']
.describe(
percentiles=[
0.01,
0.05,
0.1,
0.20,
0.30,
0.40,
0.50,
0.60,
0.70,
0.80,
0.90,
0.95,
0.99,
]
)
.T
.to_markdown()
)

All Non-Outlier Rates

hospitalpayer
count22537747
mean2.037341.79309
std0.9107451.57546
min0.80.8
1%0.830.83
5%0.920.9
10%10.94
20%1.150.98
30%1.351
40%1.591.04
50%1.831.11
60%2.141.25
70%2.51.57
80%2.972.12
90%3.473.55
95%3.755.44
99%3.968.6908
max410
Code
df = pd.read_sql(f"""
SELECT canonical_rate_source, canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE
is_drug_code = True
AND provider_type = 'Hospital'
AND canonical_rate_score > 1
AND canonical_rate_class != 'Impute'
ORDER BY RANDOM()
LIMIT 10000
""", con=trino_conn)
df

print(
df
.groupby('canonical_rate_source')['canonical_rate_percent_of_medicare']
.describe(
percentiles=[
0.01,
0.05,
0.1,
0.20,
0.30,
0.40,
0.50,
0.60,
0.70,
0.80,
0.90,
0.95,
0.99,
]
)
.T
.to_markdown()
)

Consistent Rates

pct_of_medicare
count6852
mean1.53906
std0.886572
min0.8
1%0.86
5%0.94
10%0.98
25%1
50%1.43
75%1.63
90%2.05
95%2.89
99%5.63
max9.37
Code

%%

df = pd.read_sql(f""" WITH df AS ( SELECT provider_id, network_id, APPROX_PERCENTILE(i.canonical_rate_percent_of_medicare, 0.05) as p05, APPROX_PERCENTILE(i.canonical_rate_percent_of_medicare, 0.25) as p25, APPROX_PERCENTILE(i.canonical_rate_percent_of_medicare, 0.5) as p50, APPROX_PERCENTILE(i.canonical_rate_percent_of_medicare, 0.75) as p75, APPROX_PERCENTILE(i.canonical_rate_percent_of_medicare, 0.95) as p95, COUNT(*) as n_rates FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged i WHERE i.provider_type = 'Hospital' AND i.is_drug_code = True AND i.canonical_rate_class != 'Impute' AND i.canonical_rate_score > 1 AND i.canonical_rate_source = 'hospital' GROUP BY 1,2 ), consistent AS ( SELECT * FROM df WHERE p95 - p05 < 0.25 AND n_rates >= 30 ) SELECT i.canonical_rate_percent_of_medicare as pct_of_medicare FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged i JOIN consistent USING (provider_id, network_id) WHERE i.canonical_rate_class != 'Impute' AND i.is_drug_code = True AND i.provider_type = 'Hospital' AND i.canonical_rate_score > 1 AND i.canonical_rate_source = 'hospital' ORDER BY RANDOM() LIMIT 50000 """, con=trino_conn)

%%

print( df['pct_of_medicare'] .describe(percentiles=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]) .to_markdown() )