4.1 Getting and Setting Values in Dictionaries
#Write a function that "moves" an entry from one key to another.
def moveKey(d, old, new):
d[new] = d[old] #This will throw an error if old is not a key
del d[old]
4.2 ML Reader Version 2
def stripComment(s):
"""Remove // comments from a given string"""
if '//' in s:
return s[:s.index('//')]
return s
def readML2(filename):
#I don't use a "with" statement here to reduce indentation depth.
f = open(filename)
#Get the feature count
done = False
while not done:
line = stripComment(f.readline())
if line == '' or line == '\n':
continue #This is a comment or blank line, so do nothing
featureCount = int(line)
done = True
#Make a dictionary of features
#Also store an in-order list of the features
featuresInOrder = []
features = {}
while featureCount > 0:
line = stripComment(f.readline())
if line == '' or line == '\n':
continue
thisFeature = line.split()
#Add a new element in features
#The key is the feature name
#The value is a list of the options.
features[thisFeature[0]] = thisFeature[2:]
featuresInOrder.append(thisFeature[0])
featureCount -= 1
#Get the class labels
#We assume there are only 2 labels
labelCount = 2
labels = []
while labelCount > 0:
line = stripComment(f.readline())
if line == '' or line == '\n':
continue
labels.append(line[:-1]) #Don't include the trailing newline.
labelCount -= 1
#Get the example count
done = False
while not done:
line = stripComment(f.readline())
if line == '' or line == '\n':
continue
exampleCount = int(line)
done = True
#Get the examples
data = {}
while exampleCount > 0:
line = stripComment(f.readline())
if line == '' or line == '\n':
continue
rawDatum = line.split()
#Store a dictionary of dictionaries, where each example code points to
#a dictionary where the keys are feature names and the values are the
#feature values for this example.
datum = {'class':rawDatum[1]}
for index in range(2,len(rawDatum)):
datum[featuresInOrder[index - 2]] = rawDatum[index]
data[rawDatum[0]] = datum
exampleCount -= 1
f.close()
#For testing purposes
print features
print labels
print data
4.3 Useful Dictionary Functions
#Write a script to find the smallest not-present combination of features.
#Note: "crew" + "child" is (unsurprisingly) not present. That should be
#the answer we come up with, or at least something no larger.
#For convenience, we'll reuse readML2() and modify it so that it returns
#a list containing [features,featuresInOrder,data]
filename='titanic_fatalities.data'
#Should test whether the script works if there is a missing singleton.
#filename='dice_game.data'
[features,featuresInOrder,data] = readML2(filename)
#Check whether any single feature value is missing
#Build a dictionary to hold all the single-value "combinations".
#This approach is more robust than really necessary.
featuresFound = {}
for i in features:
inner = {} #Inner dict within featuresFound
for j in features[i]:
inner[j] = j
featuresFound[i] = inner
#Print statements below were all used for debugging purposes.
print featuresFound
print featuresInOrder
for ex in data.values():
for i in range(len(featuresInOrder)):
#print "FeaturesInOrder[i] = ", featuresInOrder[i]
#print "ex[i] = ",ex[featuresInOrder[i]]
featuresFound[featuresInOrder[i]][ex[featuresInOrder[i]]] = True
print featuresFound
foundMissingSingle = False
for i in featuresFound:
for fval in featuresFound[i].values():
if fval != True:
print "Missing single feature:",i,"=",fval
foundMissingSingle = True
if not foundMissingSingle:
print "All single feature values present."
#At this point, we can use our intuition to report missing "crew + child".
4.6 Try and Except Syntax
#Write a short program that divides two user-provided numbers.
#The program should be stable and not contain any if statements.
def stableDivide():
try:
a = int(raw_input("First: "))
b = int(raw_input("Second: "))
print a/b #This will do integer division.
except ZeroDivisionError:
print "Cannot divide by zero."
except ValueError:
print "Both inputs must be numbers."
#Write a short program that gets a filename from the user and
#prints the first line of the file.
def printFirstLine():
filename = raw_input("Name of file: ")
try:
f = open(filename)
print f.readline()
except IOError:
print "There was a problem."
#As it happens, "file does not exist" and "lack of permission" are
#both IOErrors. We don't quite yet have the tools to tell them apart.
4.7 Causing Exceptions
#Write an analog of the index() method for dictionaries.
def myIndex(d,v):
"""Find a key in d with value v, or raise a ValueError."""
for key in d:
if d[key] == v:
return key
raise ValueError(str(v) + " not in dictionary")
4.8 Following Try/Except
#Write a function for safely reading a list of files into
#your ML Reader.
def safeReadFiles(filenames):
for filename in filenames:
try:
f = open(filename)
except TypeError:
print filename,"is not a valid filename."
except IOError:
print "Failed to open file at",filename
else:
f.close() #readML2 opens the file itself, can't have two copies open
print "Successfully opened",filename
readML2(filename)