GitLab wurde erfolgreich aktualisiert. Durch regelmäßige Updates bleibt das THM GitLab sicher. Danke für Ihre Geduld.

Commit b82869c7 authored by Jens Plüddemann's avatar Jens Plüddemann

added first lecture

parents
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# Default ignored files
/workspace.xml
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Pipenv (predictive-analytics)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/predictive-analytics.iml" filepath="$PROJECT_DIR$/.idea/predictive-analytics.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
numpy = "*"
matplotlib = "*"
pandas = "*"
scikit-learn = "*"
keras = "*"
tensorflow = "*"
statsmodels = "*"
xlrd = "*"
[requires]
python_version = "3.7"
This diff is collapsed.
Month;Passengers
1949-01;224
1949-02;236
1949-03;264
1949-04;258
1949-05;242
1949-06;270
1949-07;296
1949-08;296
1949-09;272
1949-10;238
1949-11;208
1949-12;236
1950-01;230
1950-02;252
1950-03;282
1950-04;270
1950-05;250
1950-06;298
1950-07;340
1950-08;340
1950-09;316
1950-10;266
1950-11;228
1950-12;280
1951-01;290
1951-02;300
1951-03;356
1951-04;326
1951-05;344
1951-06;356
1951-07;398
1951-08;398
1951-09;368
1951-10;324
1951-11;292
1951-12;332
1952-01;342
1952-02;360
1952-03;386
1952-04;362
1952-05;366
1952-06;436
1952-07;460
1952-08;484
1952-09;418
1952-10;382
1952-11;344
1952-12;388
1953-01;392
1953-02;392
1953-03;472
1953-04;470
1953-05;458
1953-06;486
1953-07;528
1953-08;544
1953-09;474
1953-10;422
1953-11;360
1953-12;402
1954-01;408
1954-02;376
1954-03;470
1954-04;454
1954-05;468
1954-06;528
1954-07;604
1954-08;586
1954-09;518
1954-10;458
1954-11;406
1954-12;458
1955-01;484
1955-02;466
1955-03;534
1955-04;538
1955-05;540
1955-06;630
1955-07;728
1955-08;694
1955-09;624
1955-10;548
1955-11;474
1955-12;556
1956-01;568
1956-02;554
1956-03;634
1956-04;626
1956-05;636
1956-06;748
1956-07;826
1956-08;810
1956-09;710
1956-10;612
1956-11;542
1956-12;612
1957-01;630
1957-02;602
1957-03;712
1957-04;696
1957-05;710
1957-06;844
1957-07;930
1957-08;934
1957-09;808
1957-10;694
1957-11;610
1957-12;672
1958-01;680
1958-02;636
1958-03;724
1958-04;696
1958-05;726
1958-06;870
1958-07;982
1958-08;1010
1958-09;808
1958-10;718
1958-11;620
1958-12;674
1959-01;720
1959-02;684
1959-03;812
1959-04;792
1959-05;840
1959-06;944
1959-07;1096
1959-08;1118
1959-09;926
1959-10;814
1959-11;724
1959-12;810
1960-01;834
1960-02;782
1960-03;838
1960-04;922
1960-05;944
1960-06;1070
1960-07;1244
1960-08;1212
1960-09;1016
1960-10;922
1960-11;780
1960-12;864
\ No newline at end of file
import math
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
if __name__ == '__main__':
print('Hi')
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.plotting import register_matplotlib_converters, lag_plot, autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.arima_model import ARIMA
register_matplotlib_converters()
class AirlinePassengers:
def __init__(self, file_path: str):
self.df = pd.read_excel(file_path)
self.df['Month'] = pd.to_datetime(self.df['Month'])
self.time = self.df['Month']
self.passengers = self.df['Passengers']
def plot_initial(self):
plt.plot(self.time, self.passengers, label='Initial')
plt.title('Passengers per Month')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_moving_average_without_pandas(self, windows=None):
if windows is None:
windows = [3]
for window in windows:
moving_averages = []
start = math.floor(window / 2)
end = len(self.passengers) - start
for i in range(end):
if i >= start:
value = 0
for j in range(i - start, i + start + 1):
value = value + self.passengers[j]
value = value / window
moving_averages.append(value)
else:
moving_averages.append(np.nan)
for i in range(start):
moving_averages.append(np.nan)
plt.plot(self.time, moving_averages, label=f'Window={window}')
plt.plot(self.time, self.passengers, label='Initial')
plt.title('Initial plot vs moving averages')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_exponential_average_without_pandas(self, alpha=0.3):
exponential_averages = []
for i in range(len(self.passengers)):
if i == 0:
exponential_averages.append(self.passengers[i])
else:
exponential_averages.append(alpha * self.passengers[i] + (1 - alpha) * exponential_averages[i - 1])
plt.plot(self.time, self.passengers, label='Initial')
plt.plot(self.time, exponential_averages, label=f'Alpha={alpha}')
plt.title('Initial plot vs exponential averages')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_moving_averages_with_pandas(self, windows=None):
if windows is None:
windows = [3]
for window in windows:
rolling_mean = self.df.rolling(window=window).mean()
plt.plot(self.time, rolling_mean, label=f'Window={window}')
plt.plot(self.time, self.passengers, label='Initial')
plt.title('Initial vs rolling mean pandas')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_auto_correlation_hyndman_without_pandas(self, lags=140):
mean = np.mean(self.passengers)
variance = np.var(self.passengers)
acf = []
end = lags - 1
for tau in range(end):
sum_covariance = 0
for i in range(tau + 1, len(self.passengers)):
sum_covariance += (self.passengers[i] - mean) * (self.passengers[i - tau] - mean)
acf.append(sum_covariance / (variance * (len(self.passengers) - 1)))
plt.stem(acf, use_line_collection=True)
plt.show()
def plot_adjacent_values(self):
lag_plot(self.passengers, lag=1, label='Passengers')
plt.title('Compare adjacent values')
plt.legend()
plt.show()
def plot_auto_correlation_with_pandas(self):
data_frame = pd.concat([self.passengers.shift(1), self.passengers], axis=1, names=['t-1, t+1'])
print(f'Correlation: {data_frame.corr()}')
autocorrelation_plot(self.passengers)
plt.show()
def plot_auto_correlation_with_stats_models(self, lags=140):
plot_acf(self.passengers, lags=lags)
plt.show()
def plot_naive_forecast(self):
forecast = [math.nan]
for value in self.passengers:
forecast.append(value)
del forecast[-1]
me = 0
mae = 0
mse = 0
for value, forecast_value in zip(self.passengers, forecast):
if not math.isnan(forecast_value):
diff = value - forecast_value
me += diff
mae += np.abs(diff)
mse += diff * diff
me /= len(forecast)
mae /= len(forecast)
mse /= len(forecast)
print(f"Mean Error: {me}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Square Error: {mse}")
plt.plot(self.time, self.passengers, label='Initial data')
plt.plot(self.time, forecast, label='Naive forecast')
plt.title('Naive forecast')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_arima_model(self):
results = ARIMA(self.passengers, order=(2, 1, 2)).fit(disp=-1)
predictions = results.fittedvalues.cumsum()
# [1:] ignores the value at index 0, because fittedvalues and predictions are 1 indexed
plt.plot(self.time, self.passengers, label='Initial data')
plt.plot(self.time[1:], results.fittedvalues, label='estimated differences')
plt.plot(self.time[1:], predictions, label='accumulated sum')
plt.title('ARIMA model output')
plt.xlabel('Time')
plt.ylabel('Passengers')
plt.legend()
plt.show()
def plot_arima_forecast(self):
results = ARIMA(self.passengers, order=(2, 1, 2)).fit(disp=-1)
results.plot_predict(1, 264)
plt.show()
if __name__ == '__main__':
data_set = AirlinePassengers('../../res/airline-passengers.xls')
data_set.plot_initial()
data_set.plot_moving_average_without_pandas([3, 7, 11, 27])
data_set.plot_exponential_average_without_pandas()
data_set.plot_moving_averages_with_pandas([3, 7, 11, 27])
data_set.plot_auto_correlation_hyndman_without_pandas()
data_set.plot_adjacent_values()
data_set.plot_auto_correlation_with_stats_models()
data_set.plot_naive_forecast()
data_set.plot_arima_model()
data_set.plot_arima_forecast()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment