GitLab wurde erfolgreich aktualisiert. Durch regelmäßige Updates bleibt das THM GitLab sicher. Danke für Ihre Geduld.

Commit a2c2173f authored by Jens Plüddemann's avatar Jens Plüddemann

added more lectures

parent e9974405
,Income,Purchase(Y/N)
0,59221.0,0
1,61823.0,1
2,60545.0,1
3,57752.0,0
4,53783.0,0
5,62283.0,1
6,62818.0,1
7,56215.0,0
8,64098.0,1
9,58925.0,0
10,66578.0,1
11,61505.0,1
12,59160.0,0
13,63094.0,1
14,60278.0,1
15,60323.0,1
16,60190.0,1
17,59612.0,1
18,61586.0,1
19,62027.0,1
20,60887.0,1
21,64531.0,1
22,56609.0,0
23,58841.0,0
24,62062.0,1
25,57059.0,0
26,63911.0,1
27,61782.0,1
28,59547.0,0
29,61335.0,1
30,58533.0,0
31,56295.0,0
32,62143.0,1
33,62358.0,1
34,60286.0,1
35,59945.0,1
36,54682.0,0
37,57914.0,0
38,58836.0,0
39,60584.0,1
40,63463.0,1
41,58701.0,0
42,58050.0,1
43,62389.0,1
44,59683.0,1
45,56578.0,0
46,63046.0,1
47,57856.0,0
48,58596.0,1
49,57416.0,0
50,60897.0,1
51,62684.0,1
52,59061.0,1
53,60991.0,1
54,58821.0,0
55,65842.0,1
56,63757.0,1
57,58511.0,0
58,61321.0,1
59,62350.0,1
60,61066.0,1
61,58105.0,0
62,59594.0,1
63,61817.0,1
64,61110.0,1
65,57858.0,0
66,61117.0,1
67,57463.0,0
68,54669.0,0
69,60435.0,1
70,62378.0,1
71,61105.0,1
72,63673.0,1
73,64374.0,1
74,60884.0,1
75,58392.0,1
76,59882.0,1
77,56377.0,0
78,59910.0,1
79,59773.0,0
80,60441.0,1
81,62737.0,1
82,54684.0,0
83,61879.0,1
84,58648.0,0
85,61983.0,1
86,60434.0,1
87,57411.0,0
88,62186.0,1
89,58151.0,0
90,61307.0,1
91,58520.0,0
92,58806.0,1
93,60281.0,1
94,64762.0,1
95,61735.0,1
96,59951.0,1
97,64157.0,1
98,60077.0,0
99,59256.0,0
,Year,Month,Interest_Rate,Unemployment_Rate,Stock_Index_Price
0,2017,12,2.75,5.3,1464
1,2017,11,2.5,5.3,1394
2,2017,10,2.5,5.3,1357
3,2017,9,2.5,5.3,1293
4,2017,8,2.5,5.4,1256
5,2017,7,2.5,5.6,1254
6,2017,6,2.5,5.5,1234
7,2017,5,2.25,5.5,1195
8,2017,4,2.25,5.5,1159
9,2017,3,2.25,5.6,1167
10,2017,2,2.0,5.7,1130
11,2017,1,2.0,5.9,1075
12,2016,12,2.0,6.0,1047
13,2016,11,1.75,5.9,965
14,2016,10,1.75,5.8,943
15,2016,9,1.75,6.1,958
16,2016,8,1.75,6.2,971
17,2016,7,1.75,6.1,949
18,2016,6,1.75,6.1,884
19,2016,5,1.75,6.1,866
20,2016,4,1.75,5.9,876
21,2016,3,1.75,6.2,822
22,2016,2,1.75,6.2,704
23,2016,1,1.75,6.1,719
{
"Year": [
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2017,
2016,
2016,
2016,
2016,
2016,
2016,
2016,
2016,
2016,
2016,
2016,
2016
],
"Month": [
12,
11,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1,
12,
11,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1
],
"Interest_Rate": [
2.75,
2.5,
2.5,
2.5,
2.5,
2.5,
2.5,
2.25,
2.25,
2.25,
2,
2,
2,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75,
1.75
],
"Unemployment_Rate": [
5.3,
5.3,
5.3,
5.3,
5.4,
5.6,
5.5,
5.5,
5.5,
5.6,
5.7,
5.9,
6,
5.9,
5.8,
6.1,
6.2,
6.1,
6.1,
6.1,
5.9,
6.2,
6.2,
6.1
],
"Stock_Index_Price": [
1464,
1394,
1357,
1293,
1256,
1254,
1234,
1195,
1159,
1167,
1130,
1075,
1047,
965,
943,
958,
971,
949,
884,
866,
876,
822,
704,
719
]
}
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve
class LogReg:
def __init__(self, file_path: str):
# Einlesen der CSV Datei, Index ist schon gegeben in der 0ten Spalte, deswegen index_col
self.df = pd.read_csv(file_path, index_col=0)
# Variablen für Autovervollständigung
self.X = self.df["Income"]
self.y = self.df["Purchase(Y/N)"]
def plot_initial(self):
# Plotte die initialen Daten
plt.scatter(self.X, self.y, label="initial data")
# cosmetics
plt.xlabel("Income")
plt.ylabel("Purchase Y/N")
plt.title("Income vs Purchase")
plt.legend()
# Zeige alle Plots
plt.show()
def plot_logistic_regression(self):
# Erstellen eines neuen Standardscalers
scaler = StandardScaler()
# Die X-Werte dem Standardscaler hinzufügen
# Werte liegen in [.., ..] vor, müssen aber in [[..], [..]] vorliegen, deshalb reshape
X_scaled = scaler.fit_transform(self.X.to_numpy().reshape(-1, 1))
# Unterteilen der Daten in Trainings und Testdaten
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, self.y, test_size=0.2,
random_state=0)
X_train = scaler.inverse_transform(X_train_scaled)
X_test = scaler.inverse_transform(X_test_scaled)
# Erstellen eines neuen Regressions Objekt
regressor = LogisticRegression()
# Die Trainingsdaten dem Regressions Objekt hinzufügen
regressor.fit(X_train_scaled, y_train)
# Plotten der inititalen Daten
plt.scatter(self.X, self.y, label="Initial data")
# Plotte die Ergebnisse des Trainingssets
plt.scatter(X_train, y_train, label="Training data")
# Plotte die Trainingsdaten nochmal, aber sortiert
X_train = np.sort(X_train)
X_train_scaled = np.sort(X_train_scaled)
y_pred_proba = regressor.predict_proba(X_train_scaled)[:, 1]
plt.scatter(X_train, y_pred_proba, label="Logistic Regression")
# Plotte die Testdaten
plt.scatter(X_test, y_test, label="Test data")
# cosmetics
plt.title("Logistic Regression")
plt.xlabel('Income')
plt.ylabel('Purchase(Y/N)')
plt.legend()
# Zeige alle Plots
plt.show()
def plot_confusion_matrix(self):
# Erstellen eines neuen Standardscalers
scaler = StandardScaler()
# Die X-Werte dem Standardscaler hinzufügen
# Werte liegen in [.., ..] vor, müssen aber in [[..], [..]] vorliegen, deshalb reshape
X_scaled = scaler.fit_transform(self.X.to_numpy().reshape(-1, 1))
# Unterteilen der Daten in Trainings und Testdaten
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, self.y, test_size=0.2,
random_state=0)
# Erstellen eines neuen Regressions Objekt und hinzufügen der Daten
regressor = LogisticRegression().fit(X_train_scaled, y_train)
# Vorhersagen von y-Werten
y_prediction = regressor.predict(X_test_scaled)
threshhold = 0.8
# y_pred = (regressor.predict_proba(X_test_scaled)[:, 1] >=
# threshhold).astype(bool)
# Erstellen der Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_prediction)
# Schreibe confusion matrix auf die Konsole
print(f"Confusion Matrix:\n{conf_matrix}")
# Errechne die Werte und schreibe sie auf die Konsole
print(f"Sensitivität: {conf_matrix[1, 1] / conf_matrix[1, 1] + conf_matrix[1, 0]}")
print(f"Spezifizität; {conf_matrix[0, 0] / conf_matrix[0, 0] + conf_matrix[0, 1]}")
print(f"Genauigkeit; {(conf_matrix[1, 1] + conf_matrix[0, 0]) / (conf_matrix[0, 0] + conf_matrix[0, 1] + conf_matrix[1, 0] + conf_matrix[1, 1])}")
print(f"Genauigkeit Regressor: {regressor.score(X_test_scaled, y_test)}")
print(f"Präzision: {conf_matrix[1, 1] / (conf_matrix[1, 1] + conf_matrix[0, 1])}")
def plot_roc_curve(self):
# Erstellen eines neuen Standardscalers
scaler = StandardScaler()
# Die X-Werte dem Standardscaler hinzufügen
# Werte liegen in [.., ..] vor, müssen aber in [[..], [..]] vorliegen, deshalb reshape
X_scaled = scaler.fit_transform(self.X.to_numpy().reshape(-1, 1))
# Unterteilen der Daten in Trainings und Testdaten
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, self.y, test_size=0.2,
random_state=0)
# Erstellen eines neuen Regressions Objekt und hinzufügen der Daten
regressor = LogisticRegression().fit(X_train_scaled, y_train)
# Errechnen der roc curve
logit_roc_auc = roc_auc_score(y_test, regressor.predict(X_test_scaled))
fpr, tpr, threshholds = roc_curve(y_test, regressor.predict_proba(X_test_scaled)[:, -1])
# Plotten der ROC curve
plt.plot(fpr, tpr, label=f"Logistic Regression area={logit_roc_auc}")
# cosmetics
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([-0.05, 1.0])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend()
plt.show()
if __name__ == '__main__':
data = LogReg('../../res/Data_LogReg.csv')
# data.plot_initial()
# data.plot_logistic_regression()
# data.plot_confusion_matrix()
data.plot_roc_curve()
\ No newline at end of file
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
class StockMarket:
def __init__(self, file_path: str):
# Das Dictionary wurde in eine csv Datei umgewandelt
# Einlesen der CSV Datei, und Umwandeln der Year und Month Spalte zu einer time Spalte
self.df = pd.read_csv(file_path, parse_dates={'time': [1, 2]})
# Variablen für Autovervollständigung
self.time = self.df['time']
self.interest_rate = self.df['Interest_Rate']
self.unemployment_rate = self.df['Unemployment_Rate']
self.stock_index_price = self.df['Stock_Index_Price']
def look_at_the_data(self):
# Schreiben des Datensatzes in die Konsole
# ProTip: .to_string() verhindert, dass der Datensatz gekürzt dargestellt wird
print(self.df.to_string())
def plot_initial(self):
# Erstellen einer neuen Figur und alle Plots darin zu Plotten
# ist nötig, da wir mehrere Plots in einem Fenster plotten wollen
fig, ax = plt.subplots(2, 1)
# Fixen des Abstandes zwischen den Zeilen an Plots
plt.subplots_adjust(hspace=.5)
# Plotte die initialen Daten, Unemployment in der ersten Zeile, Interest Rate in der zweiten
ax[0].scatter(self.unemployment_rate, self.stock_index_price, label="initial data")
ax[1].scatter(self.interest_rate, self.stock_index_price, label="initial data")
# Cosmetics
ax[0].set_title("Unemployment Rate vs Stock Index Price", y=1)
ax[1].set_title("Interest Rate vs Stock Index Price", y=1)
ax[0].set_xlabel("Unemployment Rate")
ax[0].set_ylabel("Stock Index Price")
ax[1].set_xlabel("Interest Rate")
ax[1].set_ylabel("Stock Index Price")
ax[0].grid(True)
ax[1].grid(True)
ax[0].legend()
ax[1].legend()
# Zeige alle Plots
plt.show()
def linear_regression(self):
reg = linear_model.LinearRegression()
reg.fit(self.df[["Interest_Rate", "Unemployment_Rate"]], self.stock_index_price)
print(f"Intercept: {reg.intercept_}")
print(f"Coefficients: {reg.coef_}")
print(f"Prediction: Interest_Rate=2.1, Unemployment_Rate=6.0, Stock_Index_Price = {reg.predict([[2.1, 6.0]])}")
def plot_3d(self):
# Erstellen einer 3d Achse
ax = plt.figure().gca(projection='3d')
# Plotten der Werte
ax.scatter(self.unemployment_rate, self.interest_rate, self.stock_index_price)
# cosmetics
ax.set_title("Unemployment Rate vs Interest Rate vs Stock Index Price")
ax.set_xlabel("Unemployment Rate")
ax.set_ylabel("Interest Rate")
ax.set_zlabel("Stock Index Price")
# Zeige alle Werte
plt.show()
if __name__ == '__main__':
data = StockMarket('../../res/stock_market.csv')
# data.look_at_the_data()
# data.plot_initial()
# data.linear_regression()
data.plot_3d()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment