counting whales in Moby Dick

import urllib.request as ur
from io import TextIOWrapper
import string
import re      ## regular expressions
f = ur.urlopen("http://www.gutenberg.org/ebooks/2701.txt.utf-8")
f2 = TextIOWrapper(f)
##
val = f2.readline()  ## get first line
## now skip prologue material ...
while not val[:15]=="Call me Ishmael":
    val = f2.readline()
all = val+f2.read() ## get the rest & combine with first line
words = all.lower()
## remove punctuation <http://tinyurl.com/pythonpunct>
words = re.sub('['+string.punctuation+']', '', words)
words = words.split()
w1 = words.count("whale")
w2 = len(words)
print("whales: "+str(w1))
print("words: "+str(w2))
print("ratio: "+str(w1/w2))

## whales: 893
## words: 211336
## ratio: 0.004225498731877201

counting whales in Moby Dick

Ben Bolker

13:52 25 February 2016