import urllib.request as ur
from io import TextIOWrapper
import string
import re ## regular expressions
f = ur.urlopen("http://www.gutenberg.org/ebooks/2701.txt.utf-8")
f2 = TextIOWrapper(f)
##
val = f2.readline() ## get first line
## now skip prologue material ...
while not val[:15]=="Call me Ishmael":
val = f2.readline()
all = val+f2.read() ## get the rest & combine with first line
words = all.lower()
## remove punctuation <http://tinyurl.com/pythonpunct>
words = re.sub('['+string.punctuation+']', '', words)
words = words.split()
w1 = words.count("whale")
w2 = len(words)
print("whales: "+str(w1))
print("words: "+str(w2))
print("ratio: "+str(w1/w2))
## whales: 893
## words: 211336
## ratio: 0.004225498731877201