diff --git a/.gitignore b/.gitignore index e8cca63..aa22a7f 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,9 @@ venv.bak/ # Tensorflow Model tf_pywatts_model/ + +# Tensorboard +pywatts/tensorboard + +# Figures +figures/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..9af26d2 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +PyWatts - Predict Output of Solar Panels + +# Dependencies + +PyWatts is based on python3.6 and uses the following dependencies: + +* requests (2.19.1) +* pypvwatts (2.1.0) +* numpy (1.15.0) +* peewee (3.5.4) +* scikit-learn (0.19.2) +* pandas (0.23.4) +* tensorflow (1.9.0) +* matplotlib (2.2.3) +* scipy (1.1.0) + +We suggest using a python virtualenv. + +# Execute + +The script can be executed by issuing the follwing command: + +```bash +$ python photovoltaic_gruppe4.py data.json +``` + +The output can be found in the same directory in `test_data_gruppe4.json` diff --git a/photovoltaic_gruppe4.py b/photovoltaic_gruppe4.py new file mode 100644 index 0000000..9183523 --- /dev/null +++ b/photovoltaic_gruppe4.py @@ -0,0 +1,46 @@ +import os +import sys + +import tensorflow as tf + +import pywatts.db +from pywatts.routines import * + +# get rid of TF debug message +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +if len(sys.argv) != 2: + print("Usage: python photovoltaic_gruppe4.py ") + exit(1) + +json_file = sys.argv[1] # json file + +oneH, queries = input_queries(json_file) + +feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] +n = pywatts.neural.Net(feature_cols=feature_col) + +predictions = [] +total = len(queries) +for idx, query in enumerate(queries): + + percent = idx / total + sys.stdout.write("\r") + progress = "" + for i in range(20): + if i < int(20 * percent): + progress += "=" + else: + progress += " " + sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100)) + sys.stdout.flush() + + if oneH: + predictions.extend(predict(n, query).astype('Float64').tolist()) + else: + predictions.append(predict24h(n, query)) + +print(predictions, file=open("test_data_gruppe4.json", "w")) + +sys.stdout.write("\r") +print("[ ==================== ] 100.00%") diff --git a/pywatts/__init__.py b/pywatts/__init__.py index 37f9f3a..f99e5ec 100644 --- a/pywatts/__init__.py +++ b/pywatts/__init__.py @@ -1,5 +1,5 @@ from pywatts import db from pywatts import fetchdata from pywatts import neural -from pywatts import main +from pywatts import routines from pywatts import kcross \ No newline at end of file diff --git a/pywatts/board.py b/pywatts/board.py new file mode 100644 index 0000000..dadb303 --- /dev/null +++ b/pywatts/board.py @@ -0,0 +1,11 @@ +import tensorflow as tf +import subprocess + +writer = tf.summary.FileWriter("tensorboard") +checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best') +with tf.Session() as sess: + saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta') + saver.restore(sess, checkpoint.model_checkpoint_path) +writer.add_graph(sess.graph) + +subprocess.check_output(['tensorboard', '--logdir', 'tensorboard']) \ No newline at end of file diff --git a/pywatts/db.py b/pywatts/db.py index 37e1b66..b877125 100644 --- a/pywatts/db.py +++ b/pywatts/db.py @@ -6,8 +6,7 @@ from playhouse.sqlite_ext import SqliteExtDatabase import os.path BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -db_path = os.path.join(BASE_DIR, "../pywatts.db") -print(db_path) +db_path = os.path.join(BASE_DIR, "pywatts.db") db = SqliteExtDatabase(db_path) @@ -35,21 +34,14 @@ class Result(Model): def rows_to_df(indices): - temps = [] dcs = [] - winds = [] db.connect() for result in Result.select().where(Result.id << indices): - temps += result.temperature dcs += result.dc_output - winds += result.wind_speed db.close() return pd.DataFrame( - {'temp': temps, - 'dc': dcs, - 'wind': winds - }) + {'dc': dcs}) diff --git a/pywatts/eval_training.py b/pywatts/eval_training.py new file mode 100644 index 0000000..439b3bf --- /dev/null +++ b/pywatts/eval_training.py @@ -0,0 +1,74 @@ +import tensorflow as tf +import pywatts.db +from pywatts.routines import * +from pywatts import kcross + +NUM_STATIONS_FROM_DB = 75 +K = 10 +NUM_EVAL_STATIONS = 40 +TRAIN = True +PLOT = True +TRAIN_STEPS = 10 +TOTAL_STEPS = 6 +NUM_QUERIES = 5 +PREDICT_QUERY = "query-sample_24hour.json" +PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") +FIGURE_OUTPUT_DIR = "../figures/" + + +df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) +X = df +y = df['dc'] + + +# Define feature columns and initialize Regressor +feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] +n = pywatts.neural.Net(feature_cols=feature_col) + + +# Training data +(X_train, y_train, X_eval, y_eval) = kcross.split(df, K) + + +if TRAIN: + + train_eval = None + + color_gradient_base = (0.5, 0, 0) + color_step_width = (0.5/TOTAL_STEPS, 0, 0) + + for i in range(TOTAL_STEPS): + # Train the model with the steps given + train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS) + + for q in range(NUM_QUERIES): + + pred_query = input_query("../sample_data/" + PREDICT_QUERY, q) + pred_result = input_result("../sample_data/" + PREDICT_RESULT, q) + + prediction = predict24h(n, pred_query) + + pp.figure(q) + + if i == 0: + pp.plot(pred_result, 'black') + + pp.plot(prediction, color=color_gradient_base) + pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape') + + color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)]) + + for i in range(NUM_QUERIES): + pp.close(i) + + if PLOT: + # Plot training success rate (with 'average loss') + loss = [] + for e in train_eval: + loss.append(e['average_loss']) + + pp.plot(loss) + # Needed for execution in PyCharm + pp.show() + +exit() diff --git a/pywatts/kcross.py b/pywatts/kcross.py index f4174b9..ed05c79 100644 --- a/pywatts/kcross.py +++ b/pywatts/kcross.py @@ -1,6 +1,4 @@ import random -import itertools -from pywatts import db def split(data, k): @@ -18,58 +16,42 @@ def split(data, k): data_list = data['dc'].tolist() # Each sample has 337 elements - samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)] + samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)] # Randomly shuffle samples random.shuffle(samples) bucketsize = int(len(samples) / k) - print(k) - print(len(data)) - print(len(samples)) - print(bucketsize) - # K steps for i in range(k): - eval_dict = [] - train_dict = [] eval_samples = [] train_samples = [] for j in range(k): if j == i: - eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) + eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) else: - train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) + train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) - for s in eval_samples: - # Create new dictionaries in the eval lists - X_eval.append({'dc': s[:-1]}) - y_eval.append({'dc': s[-1]}) + # Create new dictionaries in the eval lists + X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) + y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) - for s in train_samples: - X_train.append({'dc': s[:-1]}) - y_train.append({'dc': s[-1]}) - - print(len(X_train) / 12) - #print(X_train) - #print(y_train) - exit(0) + X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) + y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) return X_train, y_train, X_eval, y_eval -def train(nn, X_train, y_train, X_eval, y_eval, steps=10): +def train(nn, X_train, y_train, X_eval, y_eval, steps=100): """Trains the Network nn using k-cross-validation""" evaluation = [] for count, train_data in enumerate(X_train): for i in range(steps): - nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1) - print(X_eval[count]) - print(len(X_eval[count]['dc'])) - print(y_eval[count]) - evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336))) + nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1) + evaluation.append(nn.evaluate(X_eval[count], y_eval[count])) print("Training %s: %s/%s" % (count, (i+1), steps)) + return evaluation diff --git a/pywatts/neural.py b/pywatts/neural.py index bbe0b93..e6e42b3 100644 --- a/pywatts/neural.py +++ b/pywatts/neural.py @@ -1,12 +1,9 @@ -import pandas -import numpy as np import tensorflow as tf def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): # Create dictionary for features in hour 0 ... 335 features = {str(idx): [] for idx in range(336)} - #dc_values = X['dc'].tolist() dc_values = X['dc'] # Iterate the empty dictionary always adding the idx-th element from the dc_values list @@ -15,7 +12,6 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): labels = None if y is not None: - #labels = y['dc'].values labels = y['dc'] if labels is None: @@ -23,19 +19,22 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): else: dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) - if shuffle: - dataset.shuffle(len(features['0'])) + if num_epochs is not None: + return dataset.batch(len(features['0'])) - return dataset.batch(batch_size) + if shuffle: + return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size) + else: + return dataset.batch(batch_size) class Net: __regressor = None - __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']] + __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']] def __init__(self, feature_cols=__feature_cols): self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, - hidden_units=[75, 75], + hidden_units=[64, 128, 64], model_dir='tf_pywatts_model') def train(self, training_data, training_results, batch_size, steps): diff --git a/pywatts/main.py b/pywatts/routines.py similarity index 58% rename from pywatts/main.py rename to pywatts/routines.py index 414f87d..e500cd7 100644 --- a/pywatts/main.py +++ b/pywatts/routines.py @@ -9,7 +9,7 @@ from random import randint def train_split(data, size): used_idxs = [] - X_values = {'dc': [], 'temp': [], 'wind': []} + X_values = {'dc': []} y_values = [] for i in range(size): rnd_idx = randint(0, data.size / data.shape[1] - 337) @@ -20,8 +20,6 @@ def train_split(data, size): used_idxs.append(rnd_idx) X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist()) - X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist()) - X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist()) y_values.append(data['dc'][rnd_idx + 337].tolist()) return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values}) @@ -31,11 +29,25 @@ def input_query(json_str, idx=0): tmp_df = pandas.read_json(json_str) return pandas.DataFrame.from_dict( - {'dc': tmp_df['dc'][idx], - 'temp': tmp_df['temp'][idx], - 'wind': tmp_df['wind'][idx]} + {'dc': tmp_df['dc'][idx]} ) +def input_queries(json_str): + tmp_df = pandas.read_json(json_str) + + oneH = False + try: + s = tmp_df['max_temp'][0] + except KeyError: + oneH = True + + queries = [] + for i in range(len(tmp_df)): + queries.append(pandas.DataFrame.from_dict( + {'dc': tmp_df['dc'][i]} + )) + return oneH, queries + def input_result(json_str, idx=0): tmp_df = pandas.read_json(json_str) @@ -54,17 +66,37 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100): def plot_training(evaluation): loss = [] + steps = [] for e in evaluation: - loss.append(e['average_loss']) + loss.append(e['loss']) + steps.append(e['global_step']) - pp.plot(loss) + pp.plot(steps, loss) # Needed for execution in PyCharm pp.show() def predict(nn, X_pred): pred = nn.predict1h(X_pred) - predictions = np.array([p['predictions'] for p in pred]) + # Cap results to 0 + predictions = np.array([max(p['predictions'], [0]) for p in pred]) + return predictions + + +def predict24h(nn, X_pred): + predictions = [] + + input = {'dc': X_pred['dc'].tolist()} + + for i in range(24): + pred = nn.predict1h(pandas.DataFrame.from_dict(input)) + # Cap prediction to 0 + predictions.extend(list([max(p['predictions'][0], 0) for p in pred])) + # Remove first value and append predicted value + del input['dc'][0] + input['dc'].append(predictions[-1]) + # print("Prediction for hour %d/%d" % (i+1, 24)) + return predictions @@ -76,3 +108,9 @@ def eval_prediction(prediction, result): print("The Median Absolute Error: %.2f volt dc" % median_absolute_error( result, prediction)) +def jsonify(predictions): + json_out = "[" + for v in predictions: + json_out += "[" + str(v) + "]," + json_out = json_out[:-1] + "]" + return json_out diff --git a/pywatts/test_kcross_train.py b/pywatts/test_kcross_train.py index 807c0c4..d67db36 100644 --- a/pywatts/test_kcross_train.py +++ b/pywatts/test_kcross_train.py @@ -1,14 +1,14 @@ -import peewee import tensorflow as tf + import pywatts.db from pywatts import kcross NUM_STATIONS_FROM_DB = 75 -K = 4 +K = 10 NUM_EVAL_STATIONS = 40 TRAIN = True PLOT = True -TRAIN_STEPS = 4 +TRAIN_STEPS = 10 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) @@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col) (X_train, y_train, X_eval, y_eval) = kcross.split(df, K) -train_eval = {} +#train_eval = {} if TRAIN: # Train the model with the steps given @@ -35,7 +35,7 @@ if TRAIN: if PLOT: # Plot training success rate (with 'average loss') - pywatts.main.plot_training(train_eval) + pywatts.routines.plot_training(train_eval) exit() diff --git a/pywatts/test_predict.py b/pywatts/test_predict.py index 7b76a5c..bc125be 100644 --- a/pywatts/test_predict.py +++ b/pywatts/test_predict.py @@ -1,11 +1,11 @@ import tensorflow as tf import pywatts.db -from pywatts.main import * +from pywatts.routines import * PREDICT_QUERY = "query-sample_1hour.json" PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") -QUERY_ID = 1 +QUERY_ID = 0 pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) @@ -21,4 +21,4 @@ prediction = predict(n, pred_query) print(prediction) print(pred_result) -pywatts.main.eval_prediction(prediction, pred_result) +pywatts.routines.eval_prediction(prediction, pred_result) diff --git a/pywatts/test_predict24.py b/pywatts/test_predict24.py new file mode 100644 index 0000000..c931f3a --- /dev/null +++ b/pywatts/test_predict24.py @@ -0,0 +1,27 @@ +import tensorflow as tf +import pywatts.db +from pywatts.routines import * +import matplotlib.pyplot as pp + + +PREDICT_QUERY = "query-sample_24hour.json" +PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") +QUERY_ID = 0 + + +pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) +pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID) + + +# Define feature columns and initialize Regressor +feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] +n = pywatts.neural.Net(feature_cols=feature_col) + +prediction = predict24h(n, pred_query) + +print(prediction) +print(pred_result) + +pp.plot(pred_result, 'black') +pp.plot(prediction, 'red') +pp.show() diff --git a/pywatts/test_train.py b/pywatts/test_train.py index a378485..09f814a 100644 --- a/pywatts/test_train.py +++ b/pywatts/test_train.py @@ -1,7 +1,7 @@ -import peewee import tensorflow as tf + import pywatts.db -from pywatts.main import * +from pywatts.routines import * NUM_STATIONS_FROM_DB = 75 NUM_TRAIN_STATIONS = 400 @@ -43,7 +43,7 @@ if TRAIN: if PLOT: # Plot training success rate (with 'average loss') - pywatts.main.plot_training(train_eval) + pywatts.routines.plot_training(train_eval) exit()