0% found this document useful (0 votes)
17 views3 pages

ID3 Algorithm Implementation with Tennis Data

The document provides a Python program that implements the ID3 algorithm to create a decision tree for classification. It includes functions for reading data, calculating entropy and gain ratio, and constructing the tree based on the dataset. The program concludes by printing the generated decision tree structure using a sample dataset from 'tennisdata.csv'.

Uploaded by

manishjaat321
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views3 pages

ID3 Algorithm Implementation with Tennis Data

The document provides a Python program that implements the ID3 algorithm to create a decision tree for classification. It includes functions for reading data, calculating entropy and gain ratio, and constructing the tree based on the dataset. The program concludes by printing the generated decision tree structure using a sample dataset from 'tennisdata.csv'.

Uploaded by

manishjaat321
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Practical – 3

Write a program to demonstrate the working of the decision tree based ID3 Algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.

Code

import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = [Link](csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
[Link](name)
for row in datareader:
[Link](row)

return (metadata, traindata)

class Node:
def __init__(self, attribute):
[Link] = attribute
[Link] = []
[Link] = ""

def __str__(self):
return [Link]

def subtables(data, col, delete):


dict = {}
items = [Link](data[:, col])
count = [Link](([Link][0], 1), dtype=np.int32)

for x in range([Link][0]):
for y in range([Link][0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range([Link][0]):
dict[items[x]] = [Link]((int(count[x]), [Link][1]), dtype="|S32")
pos = 0
for y in range([Link][0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = [Link](dict[items[x]], col, 1)

return items, dict

def entropy(S):
items = [Link](S)

if [Link] == 1:
return 0

counts = [Link](([Link][0], 1))


sums = 0

for x in range([Link][0]):
counts[x] = sum(S == items[x]) / ([Link] * 1.0)

for count in counts:


sums += -1 * count * [Link](count, 2)
return sums

def gain_ratio(data, col):


items, dict = subtables(data, col, delete=False)

total_size = [Link][0]
entropies = [Link](([Link][0], 1))
intrinsic = [Link](([Link][0], 1))

for x in range([Link][0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * [Link](ratio, 2)

total_entropy = entropy(data[:, -1])


iv = -1 * sum(intrinsic)

for x in range([Link][0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):


if ([Link](data[:, -1])).shape[0] == 1:
node = Node("")
[Link] = [Link](data[:, -1])[0]
return node

gains = [Link](([Link][1] - 1, 1))


for col in range([Link][1] - 1):
gains[col] = gain_ratio(data, col)

split = [Link](gains)

node = Node(metadata[split])
metadata = [Link](metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range([Link][0]):
child = create_node(dict[items[x]], metadata)
[Link]((items[x], child))

return node

def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):


if [Link] != "":
print(empty(level), [Link])
return
print(empty(level), [Link])
for value, n in [Link]:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("[Link]")


data = [Link](traindata)
node = create_node(data, metadata)
print_tree(node, 0)

You might also like