Running a Classification Tree
it's been decided select the following vars:
HISPANIC WHITE','BLACK','NAMERICAN','ASIAN','age'
in order to try to predict a regular smoked.
Previously I have install all library needed via conda gui.
[code]
-- coding: utf-8 --
""" Created on Sun JAN 09 21:12:54 2023
@author: malarcono """
-- coding: utf-8 --
from pandas import Series, DataFrame import pandas as pd import numpy as np import os import matplotlib.pylab as plt from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report import sklearn.metrics
os.chdir("C:\TREES")
""" Data Engineering and Analysis """
Load the dataset
AH_data = pd.read_csv("tree_addhealth.csv")
data_clean = AH_data.dropna()
data_clean.dtypes data_clean.describe()
""" Modeling and Prediction """
Split into training and testing sets
predictors = data_clean[['HISPANIC','WHITE','BLACK','NAMERICAN','ASIAN', 'age']]
targets = data_clean.TREG1
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)
pred_train.shape pred_test.shape tar_train.shape tar_test.shape
Build model on training data
classifier=DecisionTreeClassifier() classifier=classifier.fit(pred_train,tar_train)
predictions=classifier.predict(pred_test)
sklearn.metrics.confusion_matrix(tar_test,predictions) sklearn.metrics.accuracy_score(tar_test, predictions)
from sklearn import tree
from io import StringIO
from IPython.display import Image out = StringIO() tree.export_graphviz(classifier, out_file=out) import pydotplus graph=pydotplus.graph_from_dot_data(out.getvalue()).write_png('tree_dec.png')
and graphic result below











