|
!(async function(){ |
|
var data = await util.getFile('cns-cache/model_grid_test_accuracy.json') |
|
|
|
data = data |
|
.filter(d => util.epsilonExtent[1] <= d.epsilon && d.epsilon <= util.epsilonExtent[0]) |
|
.filter(d => d.dataset_size > 1000) |
|
|
|
|
|
|
|
|
|
|
|
var bySize = d3.nestBy(data, d => d.dataset_size) |
|
bySize.forEach((d, i) => { |
|
d.dataset_size = d.key |
|
|
|
d.color = d3.interpolatePlasma(.84- i/6) |
|
if (d.key == 60000){ |
|
d3.selectAll('.tp60').st({background: d.color, padding: 2}) |
|
} |
|
if (d.key == 7500){ |
|
d3.selectAll('.tp75').st({background: d.color, color: '#fff', padding: 2}) |
|
} |
|
|
|
d.label = { |
|
60000: {pos: [7, 11], textAnchor: 'middle', text: '60,000'}, |
|
30000: {pos: [7, 11], textAnchor: 'middle', text: '30,000'}, |
|
15000: {pos: [7, -5], textAnchor: 'start', text: '15,000'}, |
|
7500: {pos: [0, 8], textAnchor: 'start', text: '7,500'}, |
|
|
|
3750: {pos: [-34, 10], textAnchor: 'start', text: '3,750'}, |
|
2000: {pos: [-50, 10], textAnchor: 'end', text: '2,000 training points'}, |
|
}[d.key] |
|
|
|
d.forEach(e => e.size = d) |
|
}) |
|
|
|
var sel = d3.select('.accuracy-v-privacy-dataset_size').html('') |
|
.at({role: 'graphics-document', 'aria-label': `High privacy and accuracy requires more training data. Line chart showing too much differential privacy without enough data decreases accuracy.`}) |
|
|
|
sel.append('div.chart-title').text('High privacy and accuracy requires more training data') |
|
|
|
var c = d3.conventions({ |
|
sel, |
|
height: 400, |
|
margin: {bottom: 125, top: 5}, |
|
layers: 'sd', |
|
}) |
|
|
|
c.x = d3.scaleLog().domain(util.epsilonExtent).range(c.x.range()) |
|
c.xAxis = d3.axisBottom(c.x).tickFormat(d => { |
|
var rv = d + '' |
|
if (rv.split('').filter(d => d !=0 && d != '.')[0] == 1) return rv |
|
}) |
|
|
|
c.yAxis.tickFormat(d => d3.format('.0%')(d)) |
|
|
|
d3.drawAxis(c) |
|
util.addAxisLabel(c, 'Higher Privacy →', 'Test Accuracy') |
|
util.ggPlotBg(c, false) |
|
c.layers[1].append('div') |
|
.st({fontSize: 12, color: '#555', width: 120*2, textAlign: 'center', lineHeight: '1.3em'}) |
|
.translate([c.width/2 - 120, c.height + 70]) |
|
.html('in ε, a <a href="https://desfontain.es/privacy/differential-privacy-in-more-detail.html">measure</a> of how much modifying a single training point can change the model (models with a lower ε are more private)') |
|
|
|
|
|
c.svg.selectAll('.y .tick').filter(d => d == .9) |
|
.select('text').st({fontWeight: 600}).parent() |
|
.append('path') |
|
.at({stroke: '#000', strokeDasharray: '2 2', d: 'M 0 0 H ' + c.width}) |
|
|
|
var line = d3.line() |
|
.x(d => c.x(d.epsilon)) |
|
.y(d => c.y(d.accuracy)) |
|
.curve(d3.curveMonotoneX) |
|
|
|
|
|
var lineSel = c.svg.append('g').appendMany('path.accuracy-line', bySize) |
|
.at({ |
|
d: line, |
|
fill: 'none', |
|
}) |
|
.st({ stroke: d => d.color, }) |
|
.on('mousemove', setActiveDigit) |
|
|
|
var circleSel = c.svg.append('g') |
|
.appendMany('g.accuracy-circle', data) |
|
.translate(d => [c.x(d.epsilon), c.y(d.accuracy)]) |
|
.on('mousemove', setActiveDigit) |
|
|
|
|
|
circleSel.append('circle') |
|
.at({r: 4, stroke: '#fff'}) |
|
.st({fill: d => d.size.color }) |
|
|
|
|
|
var labelSel = c.svg.appendMany('g.accuracy-label', bySize) |
|
.translate(d => [c.x(d[0].epsilon), c.y(d[0].accuracy)]) |
|
labelSel.append('text') |
|
.filter(d => d.label) |
|
.translate(d => d.label.pos) |
|
.st({fill: d => d.color, fontWeight: 400}) |
|
.at({textAnchor: d => d.label.textAnchor, fontSize: 14, fill: '#000', dy: '.66em'}) |
|
.text(d => d.label.text) |
|
.filter(d => d.key == 2000) |
|
.text('') |
|
.tspans(d => d.label.text.split(' ')) |
|
|
|
|
|
c.svg.append('text.annotation') |
|
.translate([225, 106]) |
|
.tspans(d3.wordwrap('With limited data, adding more differential privacy improves accuracy...', 25), 12) |
|
|
|
c.svg.append('text.annotation') |
|
.translate([490, 230]) |
|
.tspans(d3.wordwrap(`...until it doesn't`, 20)) |
|
|
|
|
|
function setActiveDigit({dataset_size}){ |
|
lineSel |
|
.classed('active', 0) |
|
.filter(d => d.dataset_size == dataset_size) |
|
.classed('active', 1) |
|
.raise() |
|
|
|
circleSel |
|
.classed('active', 0) |
|
.filter(d => d.dataset_size == dataset_size) |
|
.classed('active', 1) |
|
.raise() |
|
|
|
labelSel |
|
.classed('active', 0) |
|
.filter(d => d.dataset_size == dataset_size) |
|
.classed('active', 1) |
|
} |
|
})() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|