diff --git a/.gitignore b/.gitignore index aa22a7f..e8cca63 100644 --- a/.gitignore +++ b/.gitignore @@ -111,9 +111,3 @@ venv.bak/ # Tensorflow Model tf_pywatts_model/ - -# Tensorboard -pywatts/tensorboard - -# Figures -figures/ diff --git a/README.md b/README.md deleted file mode 100644 index 9af26d2..0000000 --- a/README.md +++ /dev/null @@ -1,27 +0,0 @@ -PyWatts - Predict Output of Solar Panels - -# Dependencies - -PyWatts is based on python3.6 and uses the following dependencies: - -* requests (2.19.1) -* pypvwatts (2.1.0) -* numpy (1.15.0) -* peewee (3.5.4) -* scikit-learn (0.19.2) -* pandas (0.23.4) -* tensorflow (1.9.0) -* matplotlib (2.2.3) -* scipy (1.1.0) - -We suggest using a python virtualenv. - -# Execute - -The script can be executed by issuing the follwing command: - -```bash -$ python photovoltaic_gruppe4.py data.json -``` - -The output can be found in the same directory in `test_data_gruppe4.json` diff --git a/photovoltaic_gruppe4.py b/photovoltaic_gruppe4.py deleted file mode 100644 index 9183523..0000000 --- a/photovoltaic_gruppe4.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import sys - -import tensorflow as tf - -import pywatts.db -from pywatts.routines import * - -# get rid of TF debug message -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - -if len(sys.argv) != 2: - print("Usage: python photovoltaic_gruppe4.py ") - exit(1) - -json_file = sys.argv[1] # json file - -oneH, queries = input_queries(json_file) - -feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] -n = pywatts.neural.Net(feature_cols=feature_col) - -predictions = [] -total = len(queries) -for idx, query in enumerate(queries): - - percent = idx / total - sys.stdout.write("\r") - progress = "" - for i in range(20): - if i < int(20 * percent): - progress += "=" - else: - progress += " " - sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100)) - sys.stdout.flush() - - if oneH: - predictions.extend(predict(n, query).astype('Float64').tolist()) - else: - predictions.append(predict24h(n, query)) - -print(predictions, file=open("test_data_gruppe4.json", "w")) - -sys.stdout.write("\r") -print("[ ==================== ] 100.00%") diff --git a/pywatts/__init__.py b/pywatts/__init__.py index f99e5ec..37f9f3a 100644 --- a/pywatts/__init__.py +++ b/pywatts/__init__.py @@ -1,5 +1,5 @@ from pywatts import db from pywatts import fetchdata from pywatts import neural -from pywatts import routines +from pywatts import main from pywatts import kcross \ No newline at end of file diff --git a/pywatts/board.py b/pywatts/board.py deleted file mode 100644 index dadb303..0000000 --- a/pywatts/board.py +++ /dev/null @@ -1,11 +0,0 @@ -import tensorflow as tf -import subprocess - -writer = tf.summary.FileWriter("tensorboard") -checkpoint = tf.train.get_checkpoint_state('tf_pywatts_model_best') -with tf.Session() as sess: - saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta') - saver.restore(sess, checkpoint.model_checkpoint_path) -writer.add_graph(sess.graph) - -subprocess.check_output(['tensorboard', '--logdir', 'tensorboard']) \ No newline at end of file diff --git a/pywatts/db.py b/pywatts/db.py index b877125..37e1b66 100644 --- a/pywatts/db.py +++ b/pywatts/db.py @@ -6,7 +6,8 @@ from playhouse.sqlite_ext import SqliteExtDatabase import os.path BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -db_path = os.path.join(BASE_DIR, "pywatts.db") +db_path = os.path.join(BASE_DIR, "../pywatts.db") +print(db_path) db = SqliteExtDatabase(db_path) @@ -34,14 +35,21 @@ class Result(Model): def rows_to_df(indices): + temps = [] dcs = [] + winds = [] db.connect() for result in Result.select().where(Result.id << indices): + temps += result.temperature dcs += result.dc_output + winds += result.wind_speed db.close() return pd.DataFrame( - {'dc': dcs}) + {'temp': temps, + 'dc': dcs, + 'wind': winds + }) diff --git a/pywatts/eval_training.py b/pywatts/eval_training.py deleted file mode 100644 index 439b3bf..0000000 --- a/pywatts/eval_training.py +++ /dev/null @@ -1,74 +0,0 @@ -import tensorflow as tf -import pywatts.db -from pywatts.routines import * -from pywatts import kcross - -NUM_STATIONS_FROM_DB = 75 -K = 10 -NUM_EVAL_STATIONS = 40 -TRAIN = True -PLOT = True -TRAIN_STEPS = 10 -TOTAL_STEPS = 6 -NUM_QUERIES = 5 -PREDICT_QUERY = "query-sample_24hour.json" -PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") -FIGURE_OUTPUT_DIR = "../figures/" - - -df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) -X = df -y = df['dc'] - - -# Define feature columns and initialize Regressor -feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] -n = pywatts.neural.Net(feature_cols=feature_col) - - -# Training data -(X_train, y_train, X_eval, y_eval) = kcross.split(df, K) - - -if TRAIN: - - train_eval = None - - color_gradient_base = (0.5, 0, 0) - color_step_width = (0.5/TOTAL_STEPS, 0, 0) - - for i in range(TOTAL_STEPS): - # Train the model with the steps given - train_eval = kcross.train(n, X_train, y_train, X_eval, y_eval, TRAIN_STEPS) - - for q in range(NUM_QUERIES): - - pred_query = input_query("../sample_data/" + PREDICT_QUERY, q) - pred_result = input_result("../sample_data/" + PREDICT_RESULT, q) - - prediction = predict24h(n, pred_query) - - pp.figure(q) - - if i == 0: - pp.plot(pred_result, 'black') - - pp.plot(prediction, color=color_gradient_base) - pp.savefig(FIGURE_OUTPUT_DIR+'{}.pdf'.format(q), orientation='landscape') - - color_gradient_base = tuple([sum(x) for x in zip(color_gradient_base, color_step_width)]) - - for i in range(NUM_QUERIES): - pp.close(i) - - if PLOT: - # Plot training success rate (with 'average loss') - loss = [] - for e in train_eval: - loss.append(e['average_loss']) - - pp.plot(loss) - # Needed for execution in PyCharm - pp.show() - -exit() diff --git a/pywatts/kcross.py b/pywatts/kcross.py index ed05c79..f4174b9 100644 --- a/pywatts/kcross.py +++ b/pywatts/kcross.py @@ -1,4 +1,6 @@ import random +import itertools +from pywatts import db def split(data, k): @@ -16,42 +18,58 @@ def split(data, k): data_list = data['dc'].tolist() # Each sample has 337 elements - samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 30)] + samples = [data_list[i:i+337] for i in range(0, len(data_list) - 337, 337)] # Randomly shuffle samples random.shuffle(samples) bucketsize = int(len(samples) / k) + print(k) + print(len(data)) + print(len(samples)) + print(bucketsize) + # K steps for i in range(k): + eval_dict = [] + train_dict = [] eval_samples = [] train_samples = [] for j in range(k): if j == i: - eval_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) + eval_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) else: - train_samples.extend(samples[j*bucketsize:(j+1)*bucketsize]) + train_samples.extend(samples[i*bucketsize:(i+1)*bucketsize]) - # Create new dictionaries in the eval lists - X_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) - y_eval.append({'dc': [x for s in eval_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) + for s in eval_samples: + # Create new dictionaries in the eval lists + X_eval.append({'dc': s[:-1]}) + y_eval.append({'dc': s[-1]}) - X_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 != 0]}) - y_train.append({'dc': [x for s in train_samples for c, x in enumerate(s, 1) if c % 337 == 0]}) + for s in train_samples: + X_train.append({'dc': s[:-1]}) + y_train.append({'dc': s[-1]}) + + print(len(X_train) / 12) + #print(X_train) + #print(y_train) + exit(0) return X_train, y_train, X_eval, y_eval -def train(nn, X_train, y_train, X_eval, y_eval, steps=100): +def train(nn, X_train, y_train, X_eval, y_eval, steps=10): """Trains the Network nn using k-cross-validation""" evaluation = [] for count, train_data in enumerate(X_train): for i in range(steps): - nn.train(train_data, y_train[count], batch_size=1000, steps=30) #batch_size=int(len(train_data['dc'])/336), steps=1) - evaluation.append(nn.evaluate(X_eval[count], y_eval[count])) + nn.train(train_data, y_train[count], batch_size=int(len(train_data['dc'])/336), steps=1) + print(X_eval[count]) + print(len(X_eval[count]['dc'])) + print(y_eval[count]) + evaluation.append(nn.evaluate(X_eval[count], y_eval[count], batch_size=int(len(X_eval[count]['dc'])/336))) print("Training %s: %s/%s" % (count, (i+1), steps)) - return evaluation diff --git a/pywatts/routines.py b/pywatts/main.py similarity index 58% rename from pywatts/routines.py rename to pywatts/main.py index e500cd7..414f87d 100644 --- a/pywatts/routines.py +++ b/pywatts/main.py @@ -9,7 +9,7 @@ from random import randint def train_split(data, size): used_idxs = [] - X_values = {'dc': []} + X_values = {'dc': [], 'temp': [], 'wind': []} y_values = [] for i in range(size): rnd_idx = randint(0, data.size / data.shape[1] - 337) @@ -20,6 +20,8 @@ def train_split(data, size): used_idxs.append(rnd_idx) X_values['dc'].extend(data['dc'][rnd_idx:rnd_idx + 336].tolist()) + X_values['temp'].extend(data['temp'][rnd_idx:rnd_idx + 336].tolist()) + X_values['wind'].extend(data['wind'][rnd_idx:rnd_idx + 336].tolist()) y_values.append(data['dc'][rnd_idx + 337].tolist()) return pandas.DataFrame.from_dict(X_values), pandas.DataFrame.from_dict({'dc': y_values}) @@ -29,25 +31,11 @@ def input_query(json_str, idx=0): tmp_df = pandas.read_json(json_str) return pandas.DataFrame.from_dict( - {'dc': tmp_df['dc'][idx]} + {'dc': tmp_df['dc'][idx], + 'temp': tmp_df['temp'][idx], + 'wind': tmp_df['wind'][idx]} ) -def input_queries(json_str): - tmp_df = pandas.read_json(json_str) - - oneH = False - try: - s = tmp_df['max_temp'][0] - except KeyError: - oneH = True - - queries = [] - for i in range(len(tmp_df)): - queries.append(pandas.DataFrame.from_dict( - {'dc': tmp_df['dc'][i]} - )) - return oneH, queries - def input_result(json_str, idx=0): tmp_df = pandas.read_json(json_str) @@ -66,37 +54,17 @@ def train(nn, X_train, y_train, X_val, y_val, steps=100): def plot_training(evaluation): loss = [] - steps = [] for e in evaluation: - loss.append(e['loss']) - steps.append(e['global_step']) + loss.append(e['average_loss']) - pp.plot(steps, loss) + pp.plot(loss) # Needed for execution in PyCharm pp.show() def predict(nn, X_pred): pred = nn.predict1h(X_pred) - # Cap results to 0 - predictions = np.array([max(p['predictions'], [0]) for p in pred]) - return predictions - - -def predict24h(nn, X_pred): - predictions = [] - - input = {'dc': X_pred['dc'].tolist()} - - for i in range(24): - pred = nn.predict1h(pandas.DataFrame.from_dict(input)) - # Cap prediction to 0 - predictions.extend(list([max(p['predictions'][0], 0) for p in pred])) - # Remove first value and append predicted value - del input['dc'][0] - input['dc'].append(predictions[-1]) - # print("Prediction for hour %d/%d" % (i+1, 24)) - + predictions = np.array([p['predictions'] for p in pred]) return predictions @@ -108,9 +76,3 @@ def eval_prediction(prediction, result): print("The Median Absolute Error: %.2f volt dc" % median_absolute_error( result, prediction)) -def jsonify(predictions): - json_out = "[" - for v in predictions: - json_out += "[" + str(v) + "]," - json_out = json_out[:-1] + "]" - return json_out diff --git a/pywatts/neural.py b/pywatts/neural.py index e6e42b3..bbe0b93 100644 --- a/pywatts/neural.py +++ b/pywatts/neural.py @@ -1,9 +1,12 @@ +import pandas +import numpy as np import tensorflow as tf def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): # Create dictionary for features in hour 0 ... 335 features = {str(idx): [] for idx in range(336)} + #dc_values = X['dc'].tolist() dc_values = X['dc'] # Iterate the empty dictionary always adding the idx-th element from the dc_values list @@ -12,6 +15,7 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): labels = None if y is not None: + #labels = y['dc'].values labels = y['dc'] if labels is None: @@ -19,22 +23,19 @@ def pywatts_input_fn(X, y=None, num_epochs=None, shuffle=True, batch_size=1): else: dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) - if num_epochs is not None: - return dataset.batch(len(features['0'])) - if shuffle: - return dataset.shuffle(len(features['0']*len(features)*4)).repeat().batch(batch_size) - else: - return dataset.batch(batch_size) + dataset.shuffle(len(features['0'])) + + return dataset.batch(batch_size) class Net: __regressor = None - __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc']] + __feature_cols = [tf.feature_column.numeric_column(col) for col in ['dc', 'temp', 'wind']] def __init__(self, feature_cols=__feature_cols): self.__regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, - hidden_units=[64, 128, 64], + hidden_units=[75, 75], model_dir='tf_pywatts_model') def train(self, training_data, training_results, batch_size, steps): diff --git a/pywatts/test_kcross_train.py b/pywatts/test_kcross_train.py index d67db36..807c0c4 100644 --- a/pywatts/test_kcross_train.py +++ b/pywatts/test_kcross_train.py @@ -1,14 +1,14 @@ +import peewee import tensorflow as tf - import pywatts.db from pywatts import kcross NUM_STATIONS_FROM_DB = 75 -K = 10 +K = 4 NUM_EVAL_STATIONS = 40 TRAIN = True PLOT = True -TRAIN_STEPS = 10 +TRAIN_STEPS = 4 df = pywatts.db.rows_to_df(list(range(1, NUM_STATIONS_FROM_DB))) @@ -25,7 +25,7 @@ n = pywatts.neural.Net(feature_cols=feature_col) (X_train, y_train, X_eval, y_eval) = kcross.split(df, K) -#train_eval = {} +train_eval = {} if TRAIN: # Train the model with the steps given @@ -35,7 +35,7 @@ if TRAIN: if PLOT: # Plot training success rate (with 'average loss') - pywatts.routines.plot_training(train_eval) + pywatts.main.plot_training(train_eval) exit() diff --git a/pywatts/test_predict.py b/pywatts/test_predict.py index bc125be..7b76a5c 100644 --- a/pywatts/test_predict.py +++ b/pywatts/test_predict.py @@ -1,11 +1,11 @@ import tensorflow as tf import pywatts.db -from pywatts.routines import * +from pywatts.main import * PREDICT_QUERY = "query-sample_1hour.json" PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") -QUERY_ID = 0 +QUERY_ID = 1 pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) @@ -21,4 +21,4 @@ prediction = predict(n, pred_query) print(prediction) print(pred_result) -pywatts.routines.eval_prediction(prediction, pred_result) +pywatts.main.eval_prediction(prediction, pred_result) diff --git a/pywatts/test_predict24.py b/pywatts/test_predict24.py deleted file mode 100644 index c931f3a..0000000 --- a/pywatts/test_predict24.py +++ /dev/null @@ -1,27 +0,0 @@ -import tensorflow as tf -import pywatts.db -from pywatts.routines import * -import matplotlib.pyplot as pp - - -PREDICT_QUERY = "query-sample_24hour.json" -PREDICT_RESULT = PREDICT_QUERY.replace("query", "result") -QUERY_ID = 0 - - -pred_query = input_query("../sample_data/" + PREDICT_QUERY, QUERY_ID) -pred_result = input_result("../sample_data/" + PREDICT_RESULT, QUERY_ID) - - -# Define feature columns and initialize Regressor -feature_col = [tf.feature_column.numeric_column(str(idx)) for idx in range(336)] -n = pywatts.neural.Net(feature_cols=feature_col) - -prediction = predict24h(n, pred_query) - -print(prediction) -print(pred_result) - -pp.plot(pred_result, 'black') -pp.plot(prediction, 'red') -pp.show() diff --git a/pywatts/test_train.py b/pywatts/test_train.py index 09f814a..a378485 100644 --- a/pywatts/test_train.py +++ b/pywatts/test_train.py @@ -1,7 +1,7 @@ +import peewee import tensorflow as tf - import pywatts.db -from pywatts.routines import * +from pywatts.main import * NUM_STATIONS_FROM_DB = 75 NUM_TRAIN_STATIONS = 400 @@ -43,7 +43,7 @@ if TRAIN: if PLOT: # Plot training success rate (with 'average loss') - pywatts.routines.plot_training(train_eval) + pywatts.main.plot_training(train_eval) exit()