Practical – 3
Write a program to demonstrate the working of the decision tree based ID3 Algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.
Code
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = [Link](csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
[Link](name)
for row in datareader:
[Link](row)
return (metadata, traindata)
class Node:
def __init__(self, attribute):
[Link] = attribute
[Link] = []
[Link] = ""
def __str__(self):
return [Link]
def subtables(data, col, delete):
dict = {}
items = [Link](data[:, col])
count = [Link](([Link][0], 1), dtype=np.int32)
for x in range([Link][0]):
for y in range([Link][0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range([Link][0]):
dict[items[x]] = [Link]((int(count[x]), [Link][1]), dtype="|S32")
pos = 0
for y in range([Link][0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = [Link](dict[items[x]], col, 1)
return items, dict
def entropy(S):
items = [Link](S)
if [Link] == 1:
return 0
counts = [Link](([Link][0], 1))
sums = 0
for x in range([Link][0]):
counts[x] = sum(S == items[x]) / ([Link] * 1.0)
for count in counts:
sums += -1 * count * [Link](count, 2)
return sums
def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)
total_size = [Link][0]
entropies = [Link](([Link][0], 1))
intrinsic = [Link](([Link][0], 1))
for x in range([Link][0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * [Link](ratio, 2)
total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)
for x in range([Link][0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):
if ([Link](data[:, -1])).shape[0] == 1:
node = Node("")
[Link] = [Link](data[:, -1])[0]
return node
gains = [Link](([Link][1] - 1, 1))
for col in range([Link][1] - 1):
gains[col] = gain_ratio(data, col)
split = [Link](gains)
node = Node(metadata[split])
metadata = [Link](metadata, split, 0)
items, dict = subtables(data, split, delete=True)
for x in range([Link][0]):
child = create_node(dict[items[x]], metadata)
[Link]((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
def print_tree(node, level):
if [Link] != "":
print(empty(level), [Link])
return
print(empty(level), [Link])
for value, n in [Link]:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data("[Link]")
data = [Link](traindata)
node = create_node(data, metadata)
print_tree(node, 0)