tcl
88 bytes, 6448 / 3233 collisions
I see people have been counting either the number of colliding words, or else the number of words placed in nonempty buckets. I give both counts - the first is according to the problem specification, and the second is what more posters have been reporting.
# 88 bytes, 6448 collisions, 3233 words in nonempty buckets
puts "[string length {proc H w {incr h;lmap c [split $w {}] {set h [expr (2551*$h+[scan $c %c])%2**24]};set h}}] bytes"
proc H w {incr h;lmap c [split $w {}] {set h [expr (2551*$h+[scan $c %c])%2**24]};set h}
# change 2551 above to:
# 7: 85 bytes, 25839 colliding words, 13876 words in nonempty buckets
# 97: 86 bytes, 6541 colliding words, 3283 words in nonempty buckets
# 829: 87 bytes, 6471 colliding words, 3251 words in nonempty buckets
# validation program
set f [open ~/Downloads/british-english-huge.txt r]
set words [split [read $f] \n]
close $f
set have {}; # dictionary whose keys are hash codes seen
foreach w $words {
if {$w eq {}} continue
set h [H $w]
dict incr have $h
}
set coll 0
dict for {- count} $have {
if {$count > 1} {
incr coll $count
}
}
puts "found $coll collisions"