GitLab wurde erfolgreich aktualisiert. Durch regelmäßige Updates bleibt das THM GitLab sicher. Danke für Ihre Geduld.

Commit b8a6c777 authored by Jens Plüddemann's avatar Jens Plüddemann

added salary data

parent 9461fe74
......@@ -3,6 +3,7 @@ import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from statsmodels.formula.api import ols
import seaborn as sns
class SalaryData:
......@@ -31,7 +32,8 @@ class SalaryData:
print(f'Intercept = {b0}')
def linear_regression_sklearn(self):
regressed_data = LinearRegression().fit(self.years.to_numpy().reshape(-1, 1), self.salary.to_numpy().reshape(-1, 1))
regressed_data = LinearRegression().fit(self.years.to_numpy().reshape(-1, 1),
self.salary.to_numpy().reshape(-1, 1))
print(f'Slope = {regressed_data.coef_[0][0]}')
print(f'Intercept = {regressed_data.intercept_[0]}')
......@@ -42,11 +44,59 @@ class SalaryData:
print(f'Slope = {regressed_data[1]}')
print(f'Intercept = {regressed_data[0]}')
def plot_regression(self):
reg_sl = LinearRegression().fit(self.years.to_numpy().reshape(-1, 1), self.salary.to_numpy().reshape(-1, 1))
reg_sm = ols('Salary ~ YearsExperience', data=self.df).fit()
plt.scatter(self.years, self.salary, label='initial data')
plt.plot(self.years, reg_sl.predict(self.years.to_numpy().reshape(-1, 1)), label='regression sklearn')
plt.plot(self.years, reg_sm.predict(self.years), label='regression statsmodels')
plt.title('Salary vs experience')
plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.legend()
plt.show()
def evaluate_model(self):
reg_sl = LinearRegression().fit(self.years.to_numpy().reshape(-1, 1), self.salary.to_numpy().reshape(-1, 1))
mse = np.sum(
(reg_sl.predict(self.years.to_numpy().reshape(-1, 1)) - self.salary.to_numpy().reshape(-1, 1)) ** 2) / len(
self.salary)
rmse = np.sqrt(mse)
ssres = np.sum(
(reg_sl.predict(self.years.to_numpy().reshape(-1, 1)) - self.salary.to_numpy().reshape(-1, 1)) ** 2)
sstot = np.sum((self.salary - np.mean(self.salary)) ** 2)
r_squared = 1 - (ssres / sstot)
print(f'RMSE: {rmse}')
print(f'R-squared: {r_squared}')
print(f'R-squared sklearn: {reg_sl.score(self.years.to_numpy().reshape(-1, 1), self.salary.to_numpy().reshape(-1, 1))}')
sns.residplot(x='YearsExperience', y='Salary', data=self.df)
plt.xlabel('Fitted values')
plt.ylabel('Residuals')
plt.show()
def predict_the_result(self):
reg_sl = LinearRegression().fit(self.years.to_numpy().reshape(-1, 1), self.salary.to_numpy().reshape(-1, 1))
X_test = np.array([[3.0], [2.5], [10.0], [20.0]])
y_prediction = reg_sl.predict(X_test)
print(f'Predictions: {y_prediction}')
if __name__ == '__main__':
data_set = SalaryData('../../res/Salary_Data.csv')
# data_set.plot_initial()
# data_set.pearson_correlation_coefficient()
# data_set.linear_regression()
data_set.linear_regression_sklearn()
data_set.linear_regression_statsmodels()
# data_set.linear_regression_sklearn()
# data_set.linear_regression_statsmodels()
# data_set.plot_regression()
data_set.evaluate_model()
data_set.predict_the_result()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment