Compare commits

...
Sign in to create a new pull request.

26 commits

Author SHA1 Message Date
a8fd0844d2
Add readme 2018-09-13 14:50:50 +02:00
688b4025df
Fix script again 2018-09-13 14:13:41 +02:00
615428944a
Add fancy schmancy progress bar 2018-09-13 13:29:48 +02:00
ed68d78d98
Script output 2018-09-13 11:28:17 +02:00
70edcea2ca
Rubix changes 2018-09-12 17:52:05 +02:00
f668ceaf6a
Rename and fix script 2018-09-11 14:41:52 +02:00
reedts
f5735fa2f1 Minor fixes 2018-09-10 19:44:42 +02:00
65756a18a4
Merge 2018-09-09 17:27:13 +02:00
c6261134c9
Add prediction script 2018-09-09 17:25:43 +02:00
reedts
51d0e9cea8 Added new test configuration 2018-08-14 22:20:40 +02:00
reedts
dfddb8799e Merge branch 'master' of github.com:vanitasvitae/pywatts 2018-08-14 15:22:07 +02:00
reedts
e019f1bee7 Fixed shuffling 2018-08-14 15:21:39 +02:00
e97ba96dd4
Add figures folder to gitignore 2018-08-13 18:57:16 +02:00
reedts
173d5762bc Fix multiple graphs and plot while evaluating 2018-08-13 18:54:34 +02:00
525298f761
Fix reference to old main class 2018-08-13 17:17:37 +02:00
reedts
d4da4ca121 Add eval_training script 2018-08-13 16:35:03 +02:00
reedts
0eef892e0c Removed unnecessary line 2018-08-13 14:43:19 +02:00
reedts
68e9b9ddd0 Cap all prediction values to zero 2018-08-13 14:42:31 +02:00
reedts
aec38b2764 Merge branch 'master' of github.com:vanitasvitae/pywatts 2018-08-13 14:31:58 +02:00
reedts
841690f98b Capping to zero 2018-08-13 14:31:39 +02:00
ba0c7bc2ea
Add tensorboard to gitignore 2018-08-13 14:20:38 +02:00
reedts
0e228772dc Added 24 hour prediction 2018-08-13 14:19:39 +02:00
fd623c32de
Add tensorboard export script 2018-08-13 12:38:22 +02:00
reedts
0c07241104 Removed unnecessary lines 2018-08-13 10:02:50 +02:00
reedts
288be08699 Fixed (?) kcross 2018-08-09 11:54:33 +02:00
2dfe5ef1b6
Fix kcross validation 2018-08-07 17:54:05 +02:00
14 changed files with 272 additions and 70 deletions

6
.gitignore vendored
View file

@ -111,3 +111,9 @@ venv.bak/
# Tensorflow Model # Tensorflow Model
tf_pywatts_model/ tf_pywatts_model/
# Tensorboard
pywatts/tensorboard
# Figures
figures/

27
README.md Normal file
View file

@ -0,0 +1,27 @@
PyWatts - Predict Output of Solar Panels
# Dependencies
PyWatts is based on python3.6 and uses the following dependencies:
* requests (2.19.1)
* pypvwatts (2.1.0)
* numpy (1.15.0)
* peewee (3.5.4)
* scikit-learn (0.19.2)
* pandas (0.23.4)
* tensorflow (1.9.0)
* matplotlib (2.2.3)
* scipy (1.1.0)
We suggest using a python virtualenv.
# Execute
The script can be executed by issuing the follwing command:
```bash
$ python photovoltaic_gruppe4.py data.json
```
The output can be found in the same directory in `test_data_gruppe4.json`

46
photovoltaic_gruppe4.py Normal file
View file

@ -0,0 +1,46 @@
import os
import sys
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
# get rid of TF debug message
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
if len(sys.argv) != 2:
print("Usage: python photovoltaic_gruppe4.py <file.json>")
exit(1)
json_file = sys.argv[1] # json file
oneH, queries = input_queries(json_file)
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
predictions = []
total = len(queries)
for idx, query in enumerate(queries):
percent = idx / total
sys.stdout.write("\r")
progress = ""
for i in range(20):
if i < int(20 * percent):
progress += "="
else:
progress += " "
sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
sys.stdout.flush()
if oneH:
predictions.extend(predict(n, query).astype('Float64').tolist())
else:
predictions.append(predict24h(n, query))
print(predictions, file=open("test_data_gruppe4.json", "w"))
sys.stdout.write("\r")
print("[ ==================== ] 100.00%")

View file

@ -1,5 +1,5 @@
from pywatts import db from pywatts import db
from pywatts import fetchdata from pywatts import fetchdata
from pywatts import neural from pywatts import neural
from pywatts import main from pywatts import routines
from pywatts import kcross from pywatts import kcross

11
pywatts/board.py Normal file
View file

@ -0,0 +1,11 @@
import tensorflow as tf
import subprocess
writer = tf.summary.FileWriter("tensorboard")
checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best')
with tf.Session() as sess:
saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
saver.restore(sess, checkpoint.model_checkpoint_path)
writer.add_graph(sess.graph)
subprocess.check_output(['tensorboard', '--logdir', 'tensorboard'])

View file

@ -6,8 +6,7 @@ from playhouse.sqlite_ext import SqliteExtDatabase
import os.path import os.path
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(os.path.abspath(__file__))
db_path = os.path.join(BASE_DIR, "../pywatts.db") db_path = os.path.join(BASE_DIR, "pywatts.db")
print(db_path)
db = SqliteExtDatabase(db_path) db = SqliteExtDatabase(db_path)
@ -35,21 +34,14 @@ class Result(Model):
def rows_to_df(indices): def rows_to_df(indices):
temps = []
dcs = [] dcs = []
winds = []
db.connect() db.connect()
for result in Result.select().where(Result.id << indices): for result in Result.select().where(Result.id << indices):
temps += result.temperature
dcs += result.dc_output dcs += result.dc_output
winds += result.wind_speed
db.close() db.close()
return pd.DataFrame( return pd.DataFrame(
{'temp': temps, {'dc': dcs})
'dc': dcs,
'wind': winds
})

74
pywatts/eval_training.py Normal file
View file

@ -0,0 +1,74 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
from pywatts import kcross
NUM_STATIONS_FROM_DB = 75
K = 10
NUM_EVAL_STATIONS = 40
TRAIN = True
PLOT = True
TRAIN_STEPS = 10
TOTAL_STEPS = 6
NUM_QUERIES = 5
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
FIGURE_OUTPUT_DIR = "../figures/"
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
X = df
y = df['dc']
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
# Training data
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
if TRAIN:
train_eval = None
color_gradient_base = (0.5, 0, 0)
color_step_width = (0.5/TOTAL_STEPS, 0, 0)
for i in range(TOTAL_STEPS):
# Train the model with the steps given
train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS)
for q in range(NUM_QUERIES):
pred_query = input_query("../sample_data/" + PREDICT_QUERY, q)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, q)
prediction = predict24h(n, pred_query)
pp.figure(q)
if i == 0:
pp.plot(pred_result, 'black')
pp.plot(prediction, color=color_gradient_base)
pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape')
color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)])
for i in range(NUM_QUERIES):
pp.close(i)
if PLOT:
# Plot training success rate (with 'average loss')
loss = []
for e in train_eval:
loss.append(e['average_loss'])
pp.plot(loss)
# Needed for execution in PyCharm
pp.show()
exit()

View file

@ -1,6 +1,4 @@
import random import random
import itertools
from pywatts import db
def split(data, k): def split(data, k):
@ -18,58 +16,42 @@ def split(data, k):
data_list = data['dc'].tolist() data_list = data['dc'].tolist()
# Each sample has 337 elements # Each sample has 337 elements
samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)] samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)]
# Randomly shuffle samples # Randomly shuffle samples
random.shuffle(samples) random.shuffle(samples)
bucketsize = int(len(samples) / k) bucketsize = int(len(samples) / k)
print(k)
print(len(data))
print(len(samples))
print(bucketsize)
# K steps # K steps
for i in range(k): for i in range(k):
eval_dict = []
train_dict = []
eval_samples = [] eval_samples = []
train_samples = [] train_samples = []
for j in range(k): for j in range(k):
if j == i: if j == i:
eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
else: else:
train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize])
for s in eval_samples: # Create new dictionaries in the eval lists
# Create new dictionaries in the eval lists X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
X_eval.append({'dc': s[:-1]}) y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
y_eval.append({'dc': s[-1]})
for s in train_samples: X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]})
X_train.append({'dc': s[:-1]}) y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]})
y_train.append({'dc': s[-1]})
print(len(X_train) / 12)
#print(X_train)
#print(y_train)
exit(0)
return X_train, y_train, X_eval, y_eval return X_train, y_train, X_eval, y_eval
def train(nn, X_train, y_train, X_eval, y_eval, steps=10): def train(nn, X_train, y_train, X_eval, y_eval, steps=100):
"""Trains the Network nn using k-cross-validation""" """Trains the Network nn using k-cross-validation"""
evaluation = [] evaluation = []
for count, train_data in enumerate(X_train): for count, train_data in enumerate(X_train):
for i in range(steps): for i in range(steps):
nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1) nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1)
print(X_eval[count]) evaluation.append(nn.evaluate(X_eval[count], y_eval[count]))
print(len(X_eval[count]['dc']))
print(y_eval[count])
evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336)))
print("Training %s: %s/%s" % (count, (i+1), steps)) print("Training %s: %s/%s" % (count, (i+1), steps))
return evaluation

View file

@ -1,12 +1,9 @@
import pandas
import numpy as np
import tensorflow as tf import tensorflow as tf
def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
# Create dictionary for features in hour 0 ... 335 # Create dictionary for features in hour 0 ... 335
features = {str(idx): [] for idx in range(336)} features = {str(idx): [] for idx in range(336)}
#dc_values = X['dc'].tolist()
dc_values = X['dc'] dc_values = X['dc']
# Iterate the empty dictionary always adding the idx-th element from the dc_values list # Iterate the empty dictionary always adding the idx-th element from the dc_values list
@ -15,7 +12,6 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
labels = None labels = None
if y is not None: if y is not None:
#labels = y['dc'].values
labels = y['dc'] labels = y['dc']
if labels is None: if labels is None:
@ -23,19 +19,22 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1):
else: else:
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
if shuffle: if num_epochs is not None:
dataset.shuffle(len(features['0'])) return dataset.batch(len(features['0']))
return dataset.batch(batch_size) if shuffle:
return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size)
else:
return dataset.batch(batch_size)
class Net: class Net:
__regressor = None __regressor = None
__feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']] __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']]
def __init__(self, feature_cols=__feature_cols): def __init__(self, feature_cols=__feature_cols):
self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
hidden_units=[75, 75], hidden_units=[64, 128, 64],
model_dir='tf_pywatts_model') model_dir='tf_pywatts_model')
def train(self, training_data, training_results, batch_size, steps): def train(self, training_data, training_results, batch_size, steps):

View file

@ -9,7 +9,7 @@ from random import randint
def train_split(data, size): def train_split(data, size):
used_idxs = [] used_idxs = []
X_values = {'dc': [], 'temp': [], 'wind': []} X_values = {'dc': []}
y_values = [] y_values = []
for i in range(size): for i in range(size):
rnd_idx = randint(0, data.size / data.shape[1] - 337) rnd_idx = randint(0, data.size / data.shape[1] - 337)
@ -20,8 +20,6 @@ def train_split(data, size):
used_idxs.append(rnd_idx) used_idxs.append(rnd_idx)
X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist()) X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist())
X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist())
X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist())
y_values.append(data['dc'][rnd_idx + 337].tolist()) y_values.append(data['dc'][rnd_idx + 337].tolist())
return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values}) return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values})
@ -31,11 +29,25 @@ def input_query(json_str, idx=0):
tmp_df = pandas.read_json(json_str) tmp_df = pandas.read_json(json_str)
return pandas.DataFrame.from_dict( return pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][idx], {'dc': tmp_df['dc'][idx]}
'temp': tmp_df['temp'][idx],
'wind': tmp_df['wind'][idx]}
) )
def input_queries(json_str):
tmp_df = pandas.read_json(json_str)
oneH = False
try:
s = tmp_df['max_temp'][0]
except KeyError:
oneH = True
queries = []
for i in range(len(tmp_df)):
queries.append(pandas.DataFrame.from_dict(
{'dc': tmp_df['dc'][i]}
))
return oneH, queries
def input_result(json_str, idx=0): def input_result(json_str, idx=0):
tmp_df = pandas.read_json(json_str) tmp_df = pandas.read_json(json_str)
@ -54,17 +66,37 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100):
def plot_training(evaluation): def plot_training(evaluation):
loss = [] loss = []
steps = []
for e in evaluation: for e in evaluation:
loss.append(e['average_loss']) loss.append(e['loss'])
steps.append(e['global_step'])
pp.plot(loss) pp.plot(steps, loss)
# Needed for execution in PyCharm # Needed for execution in PyCharm
pp.show() pp.show()
def predict(nn, X_pred): def predict(nn, X_pred):
pred = nn.predict1h(X_pred) pred = nn.predict1h(X_pred)
predictions = np.array([p['predictions'] for p in pred]) # Cap results to 0
predictions = np.array([max(p['predictions'], [0]) for p in pred])
return predictions
def predict24h(nn, X_pred):
predictions = []
input = {'dc': X_pred['dc'].tolist()}
for i in range(24):
pred = nn.predict1h(pandas.DataFrame.from_dict(input))
# Cap prediction to 0
predictions.extend(list([max(p['predictions'][0], 0) for p in pred]))
# Remove first value and append predicted value
del input['dc'][0]
input['dc'].append(predictions[-1])
# print("Prediction for hour %d/%d" % (i+1, 24))
return predictions return predictions
@ -76,3 +108,9 @@ def eval_prediction(prediction, result):
print("The Median Absolute Error: %.2f volt dc" % median_absolute_error( print("The Median Absolute Error: %.2f volt dc" % median_absolute_error(
result, prediction)) result, prediction))
def jsonify(predictions):
json_out = "["
for v in predictions:
json_out += "[" + str(v) + "],"
json_out = json_out[:-1] + "]"
return json_out

View file

@ -1,14 +1,14 @@
import peewee
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts import kcross from pywatts import kcross
NUM_STATIONS_FROM_DB = 75 NUM_STATIONS_FROM_DB = 75
K = 4 K = 10
NUM_EVAL_STATIONS = 40 NUM_EVAL_STATIONS = 40
TRAIN = True TRAIN = True
PLOT = True PLOT = True
TRAIN_STEPS = 4 TRAIN_STEPS = 10
df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB)))
@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col)
(X_train, y_train, X_eval, y_eval) = kcross.split(df, K) (X_train, y_train, X_eval, y_eval) = kcross.split(df, K)
train_eval = {} #train_eval = {}
if TRAIN: if TRAIN:
# Train the model with the steps given # Train the model with the steps given
@ -35,7 +35,7 @@ if TRAIN:
if PLOT: if PLOT:
# Plot training success rate (with 'average loss') # Plot training success rate (with 'average loss')
pywatts.main.plot_training(train_eval) pywatts.routines.plot_training(train_eval)
exit() exit()

View file

@ -1,11 +1,11 @@
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts.main import * from pywatts.routines import *
PREDICT_QUERY = "query-sample_1hour.json" PREDICT_QUERY = "query-sample_1hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 1 QUERY_ID = 0
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
@ -21,4 +21,4 @@ prediction = predict(n, pred_query)
print(prediction) print(prediction)
print(pred_result) print(pred_result)
pywatts.main.eval_prediction(prediction, pred_result) pywatts.routines.eval_prediction(prediction, pred_result)

27
pywatts/test_predict24.py Normal file
View file

@ -0,0 +1,27 @@
import tensorflow as tf
import pywatts.db
from pywatts.routines import *
import matplotlib.pyplot as pp
PREDICT_QUERY = "query-sample_24hour.json"
PREDICT_RESULT = PREDICT_QUERY.replace("query", "result")
QUERY_ID = 0
pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID)
pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID)
# Define feature columns and initialize Regressor
feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)]
n = pywatts.neural.Net(feature_cols=feature_col)
prediction = predict24h(n, pred_query)
print(prediction)
print(pred_result)
pp.plot(pred_result, 'black')
pp.plot(prediction, 'red')
pp.show()

View file

@ -1,7 +1,7 @@
import peewee
import tensorflow as tf import tensorflow as tf
import pywatts.db import pywatts.db
from pywatts.main import * from pywatts.routines import *
NUM_STATIONS_FROM_DB = 75 NUM_STATIONS_FROM_DB = 75
NUM_TRAIN_STATIONS = 400 NUM_TRAIN_STATIONS = 400
@ -43,7 +43,7 @@ if TRAIN:
if PLOT: if PLOT:
# Plot training success rate (with 'average loss') # Plot training success rate (with 'average loss')
pywatts.main.plot_training(train_eval) pywatts.routines.plot_training(train_eval)
exit() exit()