This vignette is a mirror of a small book prepared internally by Grattan Institute. The goal is to demonstrate how to perform simple analysis and create common charts. You will need the taxstats
package available via devtools::install_github('hughparsonage/taxstats')
.
options("scipen" = 99)
library(knitr)
opts_chunk$set(fig.width = 9, fig.height = 6.5)
FY.YEAR <- "2013-14"
wsum <- function(x, w = 1){
sum((x) * w)
}
library(data.table)
if (requireNamespace("taxstats", quietly = TRUE)){
library(taxstats)
sample_files_all <- get_sample_files_all()
} else {
install.packages("taxstats", repos = "https://hughparsonage.github.io/drat/", type = "source")
library(taxstats)
sample_files_all <- get_sample_files_all()
}
library(grattan)
library(dtplyr)
library(dplyr)
library(ggplot2)
library(scales)
library(magrittr)
library(ggrepel)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
if (!exists("sample_files_all")){
stop("....")
}
grattan_dollar <- function (x, digits = 0) {
#
nsmall <- digits
commaz <- format(abs(x), nsmall = nsmall, trim = TRUE, big.mark = ",",
scientific = FALSE, digits = 1L)
if_else(x < 0,
paste0("\U2212","$", commaz),
paste0("$", commaz))
}
sample_file <- sample_files_all %>% filter(fy.year == FY.YEAR)
sample_file <- merge(sample_file, age_range_decoder, by = "age_range")
PREV.FY.YEAR <- yr2fy(fy2yr(FY.YEAR) - 1)
sample_file_prev <- sample_files_all[fy.year == PREV.FY.YEAR]
sample_file_prev <- merge(sample_file_prev, age_range_decoder, by = "age_range")
set.seed(48031)
sample_file %<>%
group_by(age_range_description) %>%
mutate(min_age = ifelse(grepl("to", age_range_description),
as.numeric(gsub("^([0-9]{2}).*$", "\\1", age_range_description)),
ifelse(grepl("70", age_range_description),
70,
15)),
max_age = min_age + 5,
age_imp = runif(n(), min_age, max_age)) %>%
select(-min_age, -max_age)
sample_file %<>%
mutate(Tax_Bracket = cut(Taxable_Income,
breaks = c(-Inf, 18200, 37e3, 80e3, 180e3, Inf),
include.lowest = TRUE,
labels = c("$0-$18,200",
"$18,201-$37,000",
"37,001-$80,000",
"$80,001-$180,000",
"$180,000+")))
texNum <- function(number, sig.figs = 3L, dollar = FALSE, pre.phrase = NULL, .suffix = NULL){
orig.number <- number
stopifnot(is.numeric(number), length(number) == 1L)
is.negative <- number < 0
number <- abs(number)
if (number == 0){
warning("Returning 0")
return(0)
} else {
if (is.null(.suffix)){
n.digits <- ceiling(log10(number))
suffix <- NULL
suffix_val <- 1
if (n.digits < sig.figs){
prefix <- signif(x = number, digits = sig.figs)
} else {
if (n.digits <= 6) {
prefix_val <- round(number, sig.figs - n.digits - 1)
prefix <- prettyNum(prefix_val, big.mark = ",", scientific = FALSE)
} else {
# Want to show only the number / 10^(multiple of 3) then the suffix multiplier
suffix_val <- 10 ^ (3 * ((n.digits %/% 3)))
prefix_val <- signif(number/suffix_val, digits = sig.figs)
prefix <- prefix_val
if (suffix_val <= 10^12){
switch(log10(suffix_val) / 3 - 1,
suffix <- "~million",
suffix <- "~billion",
suffix <- "~trillion")
} else {
prefix <- signif(number / 10^12, digits = sig.figs)
suffix <- "~trillion"
}
}
}
} else {
stopifnot(.suffix %in% c("million", "billion", "trillion"))
switch(.suffix,
"million" = {
prefix <- signif(number / 10^6, digits = sig.figs)
suffix <- "~million"
suffix_val <- 10^6
},
"billion" = {
prefix <- signif(number / 10^9, digits = sig.figs)
suffix <- "~billion"
suffix_val <- 10^9
},
"trillion" = {
prefix <- signif(number / 10^12, digits = sig.figs)
suffix <- "~trillion"
suffix_val <- 10^12
})
prefix_val <- prefix
}
if (dollar){
out <- paste0("\\$", prefix, suffix)
} else {
out <- paste0(prefix, suffix)
}
if (is.negative){
# General LaTeX
out <- paste0("\\(-\\)", out)
}
# is the displayed number larger than the original?
if (!is.null(pre.phrase)){
out_larger <- prefix_val * suffix_val > orig.number
if (out_larger) {
out <- paste(pre.phrase[1], out, sep = if(grepl("~$", pre.phrase[1])) "" else " ")
} else {
if (!isTRUE(all.equal(prefix_val * suffix_val,
orig.number,
tolerance = .Machine$double.eps)))
out <- paste(pre.phrase[2], out, sep = if(grepl("~$", pre.phrase[2])) "" else " ")
}
}
return(out)
}
}
There were 12.9~million taxpayers in 2013-14 in Australia. Of those, 180,000 had zero taxable income (or a taxable loss). (… and so these ‘’taxpayers’’ naturally paid no tax. Nor did the 2.5~million individuals below the tax-free threshold. For this vignette, a taxpayer is anyone who lodged a tax return, regardless of their tax liability).
tx_inc_q <- function(q){
quantile(sample_file$Taxable_Income, probs = q)
}
my_labs <- grattan_dollar(tx_inc_q((0:10)/10))
my_labs[seq(2, 10, 2)] <- paste0("\n", my_labs[seq(2, 10, 2)])
dens <- density(sample_file[Taxable_Income < tx_inc_q(0.95)]$Taxable_Income)
DF <- with(dens, data.frame(x, y))
sample_file %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income, 10)) %>%
filter(between(Taxable_Income, 0, tx_inc_q(0.95))) %>%
ggplot(aes(x = Taxable_Income)) +
geom_density() +
scale_fill_viridis(discrete = TRUE) +
scale_x_continuous("Taxable Income deciles",
labels = c(my_labs, grattan_dollar(tx_inc_q(0.95))),
# limits = c(0, tx_inc_q(0.95)),
breaks = c(tx_inc_q((0:10)/10), tx_inc_q(0.95))) +
scale_y_continuous(expand = c(0,0)) +
theme(legend.position = "none",
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())
DF %>%
mutate(Taxable_Income_decile = cut(x,
breaks = quantile(sample_file$Taxable_Income,
probs = c(0:10)/10),
right = TRUE,
include.lowest = TRUE)) %>%
filter(between(x, -1, tx_inc_q(0.95) * 1.05)) %>%
{
ggplot(., aes(x = x, y = y)) +
geom_area(color = "black", size = 1.45) +
geom_area(aes(x = x, y = y,
group = Taxable_Income_decile,
fill = factor(Taxable_Income_decile),
color = factor(Taxable_Income_decile))) +
scale_color_viridis(discrete = TRUE) +
scale_fill_viridis(discrete = TRUE) +
scale_x_continuous("Taxable Income deciles",
labels = c(my_labs, grattan_dollar(tx_inc_q(0.95))),
expand = c(0,0),
# limits = c(-1, tx_inc_q(0.95)*1.05),
breaks = c(tx_inc_q((0:10)/10), tx_inc_q(0.95))) +
scale_y_continuous(expand = c(0,0), limits = c(0, max(.$y) * 1.05)) +
theme(legend.position = "none",
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())+
annotate("text",
x = tx_inc_q(0.925),
y = 2 * max(.$y[.$x > tx_inc_q(0.925)]),
size = 10/(14/5),
label = paste0("5% of taxpayers\nhad incomes\ngreater than\n", grattan_dollar(tx_inc_q(0.95))),
hjust = 0,
vjust = 0) +
annotate("segment",
arrow = arrow(type = "closed", length = unit(11, "pt"), angle = 20),
x = tx_inc_q(0.925),
y = 1.9 * max(.$y[.$x > tx_inc_q(0.925)]),
size = 1,
xend = tx_inc_q(0.95),
yend = 1.9 * max(.$y[.$x > tx_inc_q(0.925)]))
}
n_CGs <-
sample_file %>%
filter(Tot_CY_CG_amt > 0) %$%
sum(WEIGHT)
n_CGs_prev <-
sample_file_prev %>%
filter(Tot_CY_CG_amt > 0) %$%
sum(WEIGHT)
tot_CG_amt <-
sample_file %$%
sum(as.numeric(Tot_CY_CG_amt * WEIGHT))
tot_Net_CG_amt <-
sample_file %$%
sum(as.numeric(Net_CG_amt * WEIGHT))
tax_on_CG <-
sample_file %>%
filter(Net_CG_amt > 0) %>%
mutate(tax = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_wo_CG = income_tax(pmaxC(Taxable_Income - Net_CG_amt, 0), fy.year = FY.YEAR)) %>%
summarise(total = sum((tax - tax_wo_CG) * WEIGHT),
avg = mean(tax - tax_wo_CG))
tax_on_CG_prev <-
sample_file_prev %>%
filter(Net_CG_amt > 0) %>%
mutate(tax = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_wo_CG = income_tax(pmaxC(Taxable_Income - Net_CG_amt, 0), fy.year = FY.YEAR)) %>%
summarise(total = sum((tax - tax_wo_CG) * WEIGHT),
avg = mean(tax - tax_wo_CG))
latex_percent <- function(x) gsub("%", "\\%", percent(x), fixed = TRUE)
The capital gains discount applies to assets sold after more than 12 months’ holding. There were 840,000 individuals who sold capital assets, up 8.84% from last year. The sale of their assets totalled $34.2~billion of which $12.9~billion comprised part of their taxable income.
The tax on these capital gains totalled $4.76~billion or $7,900 per individual with capital gains tax.
probCG_by_age <-
sample_file %>%
group_by(age_range_description) %>%
summarise(probCG = mean(Net_CG_amt > 0))
probCG_twenties <-
sample_file %>%
filter(age_imp < 30) %$%
mean(Net_CG_amt > 0)
probCG_65p <-
sample_file %>%
filter(age_imp >= 65) %$%
mean(Net_CG_amt > 0)
avg_marginal_rate_CG <-
sample_file %>%
filter(Net_CG_amt > 0) %>%
mutate(marginal_rate = income_tax(Taxable_Income + 1, fy.year = FY.YEAR) - income_tax(Taxable_Income, fy.year = FY.YEAR)) %$%
mean(marginal_rate)
avg_marginal_rate_CG_weighted_by_CG <-
sample_file %>%
filter(Net_CG_amt > 0) %>%
mutate(marginal_rate = income_tax(Taxable_Income + 1, fy.year = FY.YEAR) - income_tax(Taxable_Income, fy.year = FY.YEAR)) %$%
weighted.mean(marginal_rate, Net_CG_amt)
avg_marginal_rate_b4_CG <-
sample_file %>%
filter(Net_CG_amt > 0,
age_imp >= 20) %>%
mutate(Taxable_Income_b4_CG = pmaxC(Taxable_Income - Net_CG_amt, 0),
marginal_rate_b4_CG = income_tax(Taxable_Income_b4_CG + 1, fy.year = FY.YEAR) - income_tax(Taxable_Income_b4_CG, fy.year = FY.YEAR)) %>%
mutate(is_in_workforce = between(age_imp, 20, 65)) %>%
group_by(is_in_workforce) %>%
summarise(avg_marginal_rate_weighted = weighted.mean(marginal_rate_b4_CG, Net_CG_amt),
avg_marginal_Rate = mean(marginal_rate_b4_CG))
prop_no_CGT_discount <-
sample_file %>%
mutate(apparent_discount = 1 - Net_CG_amt / Tot_CY_CG_amt) %>%
filter(Tot_CY_CG_amt > 0) %$%
mean(apparent_discount == 0)
prop_100pc_CGT_discount <-
sample_file %>%
mutate(apparent_discount = 1 - Net_CG_amt / Tot_CY_CG_amt) %>%
filter(Tot_CY_CG_amt > 0) %$%
mean(apparent_discount == 1)
prop_50pc_CGT_discount <-
sample_file %>%
mutate(apparent_discount = 1 - Net_CG_amt / Tot_CY_CG_amt) %>%
filter(Tot_CY_CG_amt > 0) %$%
mean(between(apparent_discount, 0.45, 0.55))
prop_no_CGT_discount_by_val <-
sample_file %>%
mutate(apparent_discount = 1 - Net_CG_amt / Tot_CY_CG_amt) %>%
filter(Tot_CY_CG_amt > 0) %$%
weighted.mean(apparent_discount == 0, Tot_CY_CG_amt)
cgt_ratio_res <- 50
sample_file %>%
select(Tot_CY_CG_amt, Net_CG_amt, WEIGHT) %>%
filter(Tot_CY_CG_amt > 0) %>%
mutate(apparent_discount = Net_CG_amt / Tot_CY_CG_amt) %>%
mutate(apparent_discount_round = round(apparent_discount * cgt_ratio_res, 0) / cgt_ratio_res) %>%
group_by(apparent_discount_round) %>%
summarise(n_taxpayers = sum(WEIGHT),
n_taxpayers_by_val = sum(WEIGHT * Tot_CY_CG_amt)) %>%
rename(`Ratio of Net capital gains to Total capital gains` = apparent_discount_round) %>%
ggplot(aes(x = `Ratio of Net capital gains to Total capital gains`, y = n_taxpayers_by_val)) +
geom_bar(stat = "identity", width = 1/cgt_ratio_res) +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
## Warning in sum(WEIGHT * Tot_CY_CG_amt): integer overflow - use
## sum(as.numeric(.))
## Warning in sum(WEIGHT * Tot_CY_CG_amt): integer overflow - use
## sum(as.numeric(.))
## Warning: Removed 2 rows containing missing values (position_stack).
Taxable capital gains are typically realized later in life. This is unsurprising: a capital gain can only be realized when one has an asset to sell. Further, the capital gains tax makes the sale of assets less attractive when incomes are high. Taxpayers in their twenties have a 1.28% chance of incurring capital gains tax, whereas 11.8% of those of retirement age have capital gains. shows that although capital gains have been more common with older taxpayers, the age skew is slightly more pronounced in 2013-14 than in previous years.
The average marginal tax rate of those with capital gains tax was 29.1%; however, this weights an individual with a capital gain of $1 equally as someone with a capital gain of $500,000. Weighting by the value of capital gain, the average marginal tax rate was 40.6%.
The net capital gains includes the CGT discount (and other discounts) applied to: \[\text{Total capital gains} - \text{Total capital losses (incl. from prev. years)}\] Comparing the ratio of can shed some light on the value of the discount and the impact of capital losses on tax and tax revenue. Of those with nonzero total capital gains, 8.79% had no discount and 28.5% paid no tax (or a 100% discount). Some 45.3% had net capital gains of around 50% of their total gains. Weighting these numbers by the value of total capital gains, of capital gains are taxed at the full marginal rate. shows the distribution of this ratio. The deviance from 50% is due to some gains being realized within 12 months and (more commonly) capital losses.
CG_descriptive_by_bracket <-
sample_file %>%
mutate(tax = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_wo_CG = income_tax(pmaxC(Taxable_Income - Net_CG_amt, 0), fy.year = FY.YEAR)) %>%
group_by(Tax_Bracket) %>%
summarise(n_taxpayers = sum(WEIGHT),
n_CG = sum(WEIGHT[Net_CG_amt > 0]),
val_CG = sum(Tot_CY_CG_amt * WEIGHT),
total_CGT = sum((tax - tax_wo_CG) * WEIGHT)) %>%
ungroup %>%
arrange(Tax_Bracket)
## Warning in sum(Tot_CY_CG_amt * WEIGHT): integer overflow - use
## sum(as.numeric(.))
## Warning in sum(Tot_CY_CG_amt * WEIGHT): integer overflow - use
## sum(as.numeric(.))
## Warning in sum(Tot_CY_CG_amt * WEIGHT): integer overflow - use
## sum(as.numeric(.))
## Warning in sum(Tot_CY_CG_amt * WEIGHT): integer overflow - use
## sum(as.numeric(.))
CG_descriptive_by_bracket %>%
# cosmetic
mutate(`Taxpayers` = comma(n_taxpayers),
`with CG` = comma(n_CG),
`Total cap. gains ($)` = grattan_dollar(val_CG),
`Total CGT ($)` = grattan_dollar(total_CGT)) %>%
select(`Tax bracket` = Tax_Bracket,
`Taxpayers`, `with CG`, `Total cap. gains ($)`, `Total CGT ($)`) %>%
kable(align = "rrrrrr")
Tax bracket | Taxpayers | with CG | Total cap. gains ($) | Total CGT ($) |
---|---|---|---|---|
$0-$18,200 | 2,512,650 | 84,000 | NA | $0 |
$18,201-$37,000 | 3,101,900 | 118,300 | $1,958,287,950 | $64,130,858 |
37,001-$80,000 | 4,808,650 | 185,600 | NA | $413,550,412 |
$80,001-$180,000 | 2,135,400 | 151,350 | NA | $965,377,002 |
$180,000+ | 380,100 | 61,150 | NA | $3,314,621,140 |
sample_file %>%
ggplot(aes(x = age_imp, y = as.numeric(Net_CG_amt > 0))) +
geom_smooth(color = viridis(1), size = 1.2) +
scale_y_continuous(label = percent)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
sample_files_all %>%
select(age_range, Net_CG_amt, fy.year) %>%
merge(age_range_decoder, by = "age_range") %>%
group_by(age_range_description) %>%
mutate(min_age = ifelse(grepl("to", age_range_description),
as.numeric(gsub("^([0-9]{2}).*$", "\\1", age_range_description)),
ifelse(grepl("70", age_range_description),
70,
15)),
max_age = min_age + 5,
age_imp = runif(n(), min_age, max_age)) %>%
select(-min_age, -max_age) %>%
mutate(last_fy = fy.year == max(fy.year)) %>%
mutate(`Tax year` = factor(fy.year)) %>%
group_by(`Tax year`) %>%
mutate(label = if_else(age_imp == max(age_imp), fy.year, NA_character_),
is_CG = Net_CG_amt > 0,
label.y = mean(is_CG[age_imp > 71]),
Age = age_imp) %>%
{
ggplot(., aes(x = Age,
y = as.numeric(is_CG),
color = `Tax year`,
group = `Tax year`)) +
scale_y_continuous(label = percent) +
ggtitle("Incidence of capital gains") +
scale_color_viridis(discrete = TRUE) +
geom_line(stat = "smooth", method = "auto", se = FALSE, size = 1.2) +
geom_label_repel(aes(label = label, y = label.y),
fill = NA,
nudge_x = 1,
hjust = 0,
vjust = 0,
fontface = "bold",
na.rm = TRUE) +
annotate("blank",
x = 80, y = 0) +
theme_dark() +
theme(axis.title.y = element_blank())
}
## Warning: Ignoring unknown parameters: hjust, vjust
## Warning: Ignoring unknown aesthetics: x, y
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
set.seed(24841)
sample_files_all %>%
select(age_range, Net_CG_amt, fy.year) %>%
merge(age_range_decoder, by = "age_range") %>%
group_by(age_range_description) %>%
mutate(min_age = ifelse(grepl("to", age_range_description),
as.numeric(gsub("^([0-9]{2}).*$", "\\1", age_range_description)),
ifelse(grepl("70", age_range_description),
70,
15)),
max_age = min_age + 5,
age_imp = runif(n(), min_age, max_age)) %>%
select(-min_age, -max_age) %>%
filter(Net_CG_amt > 0) %>%
mutate(Age = round(age_imp)) %>%
group_by(fy.year, Age) %>%
summarise(mean_Net_CG = mean(Net_CG_amt),
sd_Net_CG = sd(Net_CG_amt)) %>%
ungroup %>%
mutate(last_fy = fy.year == max(fy.year) | fy.year == max(fy.year[fy.year != max(fy.year)])) %>%
group_by(fy.year) %>%
mutate(label = ifelse(Age == max(Age), fy.year, NA_character_),
label.y = mean(mean_Net_CG[Age > 70])) %>%
{
ggplot(., aes(x = Age, y = mean_Net_CG, color = factor(fy.year), group = factor(fy.year))) +
scale_y_continuous(label = dollar) +
scale_color_viridis(discrete = TRUE) +
geom_line(stat = "smooth", method = "auto", se = FALSE, size = 1.2) +
scale_alpha_discrete(range = c(0.5, 1)) +
geom_text(aes(label = label, y = label.y, size = if_else(last_fy %in% c("2012-13", "2013-14"), 2, 1),
nudge_x = if_else(last_fy, 1, 0)),
hjust = 0,
vjust = 0,
fontface = "bold",
na.rm = TRUE) +
scale_x_continuous(expand = c(0,0)) +
theme_dark() +
annotate("blank",
x = 85, y = 0) +
theme(axis.title.y = element_blank(),
plot.margin = unit(c(0,0,5,0), "pt"))
}
## Warning: Using alpha for a discrete variable is not advised.
## Warning: Ignoring unknown aesthetics: nudge_x
## Warning: Ignoring unknown aesthetics: x, y
## `geom_smooth()` using method = 'loess' and formula 'NULL'
sample_file %>%
mutate(Tot_inc_amt_noCG = Tot_inc_amt - Net_CG_amt,
Taxable_Income_noCG = pmaxC(Tot_inc_amt_noCG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0)) %>%
mutate(Taxable_Income_noCG_decile = ntile(Taxable_Income_noCG, 10)) %>%
filter(Taxable_Income_noCG_decile %in% c(1, 5, 10)) %>%
filter(Net_CG_amt > 0) %>%
rename(Age = age_imp) %>%
mutate(`Taxable Income\n(excl CG) decile` = factor(Taxable_Income_noCG_decile)) %>%
ggplot(aes(x = Age, fill = `Taxable Income\n(excl CG) decile`)) +
geom_density(size = 1.5, alpha = 0.7) +
scale_fill_viridis(discrete = TRUE) +
theme(legend.position = "right")
if (FY.YEAR != "2013-14"){
stop("Check annotations in this chart before compiling")
}
sample_file %>%
filter(Net_CG_amt > 0, age_imp > 20) %>%
mutate(marginal_rate = income_tax(Taxable_Income + 1, fy.year = FY.YEAR) - income_tax(Taxable_Income, fy.year = FY.YEAR)) %>%
rename(Age = age_imp) %>%
ggplot(aes(x = Age, y = marginal_rate)) +
scale_y_continuous(label = percent) +
geom_smooth(aes(weight = 1), colour = viridis(2)[1], size = 1.2) +
geom_smooth(aes(weight = Net_CG_amt), colour = viridis(2)[2], size = 1.2) +
annotate("text",
x = c(57, 57),
y = c(0.335, 0.435),
label = c("Unweighted", "Weighted by CG amt"),
colour = viridis(2),
fontface = "bold",
hjust = 0) +
theme(axis.title.y = element_blank())
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
if (FY.YEAR != "2013-14"){
stop("Check annotations in this chart before compiling")
}
sample_file %>%
filter(Net_CG_amt > 0, age_imp > 20) %>%
mutate(Taxable_Income_b4_CG = pmaxC(Taxable_Income - Net_CG_amt, 0),
marginal_rate_b4_CG = income_tax(Taxable_Income_b4_CG + 1, fy.year = FY.YEAR) - income_tax(Taxable_Income_b4_CG, fy.year = FY.YEAR)) %>%
rename(Age = age_imp) %>%
ggplot(aes(x = Age, y = marginal_rate_b4_CG)) +
scale_y_continuous(label = percent) +
geom_smooth(aes(weight = 1), colour = viridis(2)[2], size = 1.2) +
geom_smooth(aes(weight = Net_CG_amt), colour = viridis(2)[1], size = 1.2) +
annotate("text",
x = c(31, 35),
y = c(0.315, 0.225),
label = c("Unweighted", "Weighted by CG amt"),
colour = viridis(2),
fontface = "bold",
hjust = 0) +
theme(axis.title.y = element_blank())
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
n_prop_invstrs <-
sample_file %$%
sum((Gross_rent_amt > 0) * WEIGHT)
n_NGs <-
sample_file %$%
sum((Net_rent_amt < 0) * WEIGHT)
val_NG_losses <-
sample_file %$%
sum(abs(pminC(Net_rent_amt, 0) * WEIGHT))
NG_tax_exp <-
sample_file %>%
mutate(tax = income_tax(Taxable_Income, fy.year = FY.YEAR),
new_tax = income_tax(Taxable_Income - pminC(Net_rent_amt, 0), fy.year = FY.YEAR),
diff = new_tax - tax) %$%
sum(diff * WEIGHT)
There were 2~million property investors. Of these, 1.25~million were negative gearing. Losses claimed totaled $10.7~billion. This delivered a tax expenditure (by revenue foregone) of $3.63~billion.
sample_file %>%
filter(between(Sw_amt, 0, 250e3)) %>%
rename(Salary = Sw_amt) %>%
ggplot(aes(x = Salary, y = as.numeric(Net_rent_amt < 0))) +
geom_smooth(colour = viridis(2)[2], size = 1.5) +
scale_y_continuous(label = percent) +
scale_x_continuous(label = dollar) +
theme(axis.title.y = element_blank())
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
NG_by_taxBracket <-
sample_file %>%
mutate(Tax_bracket = cut(Taxable_Income,
breaks = c(-Inf, 18200, 37e3, 80e3, 180e3, Inf),
labels = c("$0-$18,200", "$18,201-$37,000",
"$37,001-$80,000", "$80,001-$180,000",
"Over $180,000"),
ordered_results = TRUE,
include.lowest = TRUE)) %>%
group_by(Tax_bracket) %>%
summarise(n_NG = wsum(Net_rent_amt < 0, WEIGHT),
n = sum(WEIGHT)) %>%
arrange(Tax_bracket)
NG_by_taxBracket %>%
mutate(`Number negative gearing` = comma(n_NG),
`\\%` = percent(n_NG / n)) %>%
select(`Tax bracket` = Tax_bracket,
`Number negative gearing`,
`\\%`) %>%
kable(align = "rrr")
Tax bracket | Number negative gearing | % |
---|---|---|
$0-$18,200 | 161,800 | 6.4% |
$18,201-$37,000 | 180,950 | 5.8% |
$37,001-$80,000 | 469,500 | 9.8% |
$80,001-$180,000 | 350,800 | 16.4% |
Over $180,000 | 85,200 | 22.4% |
NG_by_taxBracket_tax_benefit <-
sample_file %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
mutate(Tax_bracket = cut(Taxable_Income,
breaks = c(-Inf, 18200, 37e3, 80e3, 180e3, Inf),
labels = c("$0-$18,200",
"$18,201-$37,000",
"$37,001-$80,000",
"$80,001-$180,000",
"Over $180,000"),
ordered_results = TRUE,
include.lowest = TRUE)) %>%
group_by(Tax_bracket) %>%
summarise(total_tax_change = sum(change * WEIGHT),
avg_tax_change = mean(change)) %>%
arrange(Tax_bracket)
NG_by_taxBracket_tax_benefit %>%
mutate(`Total tax change` = grattan_dollar(total_tax_change),
`Average tax change` = grattan_dollar(avg_tax_change)) %>%
select(`Tax bracket` = Tax_bracket,
`Total tax change`,
`Average tax change`) %>%
kable(align = paste0(rep("r", ncol(.)), collapse = ""))
Tax bracket | Total tax change | Average tax change |
---|---|---|
$0-$18,200 | $97,261,913 | $39 |
$18,201-$37,000 | $333,873,994 | $108 |
$37,001-$80,000 | $1,246,976,877 | $259 |
$80,001-$180,000 | $1,288,682,051 | $603 |
Over $180,000 | $648,280,796 | $1,706 |
NG_by_taxable_income_decile <-
sample_file %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income, 10)) %>%
group_by(Taxable_Income_decile) %>%
summarise(n_NG = wsum(Net_rent_amt < 0, WEIGHT),
n = sum(WEIGHT)) %>%
arrange(Taxable_Income_decile)
NG_by_taxable_income_decile %>%
mutate(`Number negative gearing` = comma(n_NG),
`\\%` = percent(n_NG / n)) %>%
mutate(`Taxable Income decile` = factor(Taxable_Income_decile)) %>%
select(`Taxable Income decile`,
`Number negative gearing`,
`\\%`) %>%
kable(align = "rrrr")
Taxable Income decile | Number negative gearing | % |
---|---|---|
1 | 109,550 | 8.5% |
2 | 55,750 | 4.3% |
3 | 64,950 | 5.0% |
4 | 82,400 | 6.4% |
5 | 93,700 | 7.2% |
6 | 106,600 | 8.2% |
7 | 130,750 | 10.1% |
8 | 157,950 | 12.2% |
9 | 192,600 | 14.9% |
10 | 254,000 | 19.6% |
NG_tax_benefit_taxable_income_decile <-
sample_file %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income, 10)) %>%
group_by(Taxable_Income_decile) %>%
summarise(tax_diff = sum(change * WEIGHT)) %>%
ungroup %>%
mutate(tax_diff_prop = tax_diff / sum(tax_diff)) %>%
arrange(Taxable_Income_decile) %>%
mutate(decile_by = "Taxable income")
NG_tax_benefit_taxable_income_decile_noNG <-
sample_file %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income_noNG, 10)) %>%
group_by(Taxable_Income_decile) %>%
summarise(tax_diff = sum(change * WEIGHT)) %>%
ungroup %>%
mutate(tax_diff_prop = tax_diff / sum(tax_diff)) %>%
arrange(Taxable_Income_decile) %>%
mutate(decile_by = "Taxable income before NG")
bind_rows("Current" = NG_tax_benefit_taxable_income_decile,
"Before NG" = NG_tax_benefit_taxable_income_decile_noNG) %>%
mutate(`Taxable income decile` = factor(Taxable_Income_decile)) %>%
ggplot(aes(x = `Taxable income decile`, y = tax_diff_prop, fill = decile_by)) +
geom_bar(stat = "identity") +
facet_grid(~decile_by) +
scale_y_continuous(label = percent,
expand = c(0,0),
limits = c(0, round(max(c(NG_tax_benefit_taxable_income_decile_noNG$tax_diff_prop,
NG_tax_benefit_taxable_income_decile$tax_diff_prop)), 1)))
NG_tax_benefit_taxable_income_decile_prev <-
sample_file_prev %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income, 10)) %>%
group_by(Taxable_Income_decile) %>%
summarise(tax_diff = sum(change * WEIGHT)) %>%
ungroup %>%
mutate(tax_diff_prop = tax_diff / sum(tax_diff)) %>%
arrange(Taxable_Income_decile) %>%
mutate(decile_by = "Taxable income")
NG_tax_benefit_taxable_income_decile_noNG_prev <-
sample_file_prev %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
mutate(Taxable_Income_decile = ntile(Taxable_Income, 10)) %>%
group_by(Taxable_Income_decile) %>% summarise(tax_diff = sum(change * WEIGHT)) %>%
ungroup %>%
mutate(tax_diff_prop = tax_diff / sum(tax_diff)) %>%
arrange(Taxable_Income_decile) %>%
mutate(decile_by = "Taxable income before NG")
bind_rows("Current" = NG_tax_benefit_taxable_income_decile,
"Before NG" = NG_tax_benefit_taxable_income_decile_noNG,
"Current (prev fy)" = NG_tax_benefit_taxable_income_decile_prev,
"Before NG (prev fy)" = NG_tax_benefit_taxable_income_decile_noNG_prev,
.id = "df_id") %>%
mutate(`Taxable income decile` = factor(Taxable_Income_decile)) %>%
mutate(financial_year = ifelse(grepl("prev fy", df_id), PREV.FY.YEAR, FY.YEAR)) %>%
ggplot(aes(x = `Taxable income decile`, y = tax_diff_prop, fill = financial_year)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_viridis(discrete = TRUE, begin = 0, end = 0.3333) +
facet_grid(~decile_by) +
scale_y_continuous(label = percent,
expand = c(0,0),
limits = c(0, round(max(c(NG_tax_benefit_taxable_income_decile_noNG$tax_diff_prop,
NG_tax_benefit_taxable_income_decile$tax_diff_prop)), 1))) +
theme(legend.margin = unit(0, "lines"),
legend.title = element_blank(),
legend.position = c(0.00, 1.025),
legend.background = element_blank(),
legend.justification = c(0, 1),
axis.title.y = element_blank(),
strip.background = element_rect(color = grey(0.8), fill = grey(0.8)),
strip.text = element_text(colour = "white", face = "bold"))
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
p <-
ggplot(NULL) +
geom_smooth(data = sample_file,
aes(x = age_imp, y = as.numeric(Net_rent_amt < 0)),
colour = viridis(2)[1],
size = 1.2) +
geom_smooth(data = filter(sample_file, Gross_rent_amt > 0),
aes(x = age_imp,
y = as.numeric(Net_rent_amt < 0)),
colour = viridis(2)[2],
size = 1.2) +
scale_y_continuous(label = percent) +
xlab("Age") +
coord_cartesian(ylim = c(0,1)) +
theme(axis.title.y = element_blank())
if (FY.YEAR == "2013-14"){
p <-
p +
annotate("text",
x = c(38, 38),
y = c(0.18, y = 0.80),
label = c("All taxpayers", "Property investors"),
hjust = c(0.5, 0),
colour = viridis(2),
fontface = "bold")
} else {
p <- p +
theme(legend.position = "right")
}
p
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
age_res = 1
inc_res = 10000
sample_file %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
# This excludes income losses (barely any anyway)
# and high income earners
filter(between(Tot_inc_amt_NoNG,
0,
upper_ylim <<- quantile(.$Tot_inc_amt_NoNG[.$Tot_inc_amt_NoNG > 0], probs = 0.95))) %>%
mutate(Age = age_res * round(age_imp / age_res),
`Total Income (before NG)` = inc_res * round(Tot_inc_amt_NoNG / inc_res)) %>%
group_by(Age, `Total Income (before NG)`) %>%
summarise(n_NG = sum((Net_rent_amt < 0) * WEIGHT),
prop_NG = mean(Net_rent_amt < 0),
tot_tax_benefit = sum(change * WEIGHT),
avg_tax_benefit = mean(change)) %>% ungroup %>%
ggplot(aes(x = Age, y = `Total Income (before NG)`, fill = prop_NG)) +
geom_bin2d(stat = "identity") +
scale_fill_viridis("% NG", labels = percent) +
scale_y_continuous(expand = c(0,0), label = grattan_dollar) +
scale_x_continuous(expand = c(0,0)) +
theme_dark() +
theme(legend.title = element_blank(),
plot.margin = unit(c(0,0,0,0), "pt"))# %>%
#align_baptiste(.)
age_res = 1
inc_res = 10000
sample_file %>%
mutate(Tot_inc_amt_NoNG = Tot_inc_amt - Net_rent_amt + pmaxC(Net_rent_amt, 0),
Taxable_Income_noNG = pmaxC(Tot_inc_amt_NoNG - Tot_ded_amt - NPP_loss_claimed - PP_loss_claimed, 0),
tax_current = income_tax(Taxable_Income, fy.year = FY.YEAR),
tax_noNG = income_tax(Taxable_Income_noNG, fy.year = FY.YEAR),
change = tax_noNG - tax_current) %>%
# This excludes income losses (barely any anyway)
# and high income earners
filter(between(Tot_inc_amt_NoNG,
0,
upper_ylim <<- quantile(.$Tot_inc_amt_NoNG[.$Tot_inc_amt_NoNG > 0], probs = 0.95))) %>%
mutate(Age = age_res * round(age_imp / age_res),
`Total Income (before NG)` = inc_res * round(Tot_inc_amt_NoNG / inc_res)) %>%
group_by(Age, `Total Income (before NG)`) %>%
summarise(n_NG = sum((Net_rent_amt < 0) * WEIGHT),
prop_NG = mean(Net_rent_amt < 0),
tot_tax_benefit = sum(change * WEIGHT),
avg_tax_benefit = mean(change)) %>% ungroup %>%
ggplot(aes(x = Age, y = `Total Income (before NG)`, fill = avg_tax_benefit)) +
geom_bin2d(stat = "identity") +
scale_fill_viridis("Tax benefit", labels = grattan_dollar) +
scale_y_continuous(expand = c(0,0), label = grattan_dollar) +
scale_x_continuous(expand = c(0,0)) +
theme_dark() +
theme(legend.title = element_blank())