100 days of machine learning algorithms -Day3: Multiple linear regression

Attempts multiple linear regression by fitting a linear equation to observed data to represent relationships between two or more features and response;

# modified of code from 100-Days-of-ML-Code
# Day3_Multiple_Linear_Regression

# imporint libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.set_printoptions(edgeitems=50, linewidth=500)

# importing dataset
dataset = pd.read_csv('50_Startups.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, 4].values

# encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
X[:, 3] = labelencoder.fit_transform(X[:, 3])
print('X after label encoder \n ', X)

onehotencoder = OneHotEncoder()
X = onehotencoder.fit_transform(X).toarray()

# avoiding dummy variable trap
print('X after onehot:\n ', X)
X = X[:, 1:]
print('X after avoiding dummy trap \n', X)

# splitting the dataset into the training sets and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Step 2: fitting multiple linear regression to the training sets
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

# Step 3: predicting the test sets results
Y_pred = regressor.predict(X_test)
print('Y_pred \n', Y_pred)

 

Published 12 original articles · won praise 5 · Views 7454

Guess you like

Origin blog.csdn.net/redredxcar/article/details/104100857