{ "cells": [ { "cell_type": "markdown", "id": "f32100be", "metadata": {}, "source": [ "# Loading Custom Datasets\n", "\n", "This notebook will explain how to load custom datasets saved to CSV files, for either anomaly detection or forecasting." ] }, { "cell_type": "markdown", "id": "91095c9b", "metadata": {}, "source": [ "## Anomaly Detection Datasets\n", "\n", "Let's first look at a synthetic anomaly detection dataset. Note that this section just provides an alternative implementation of the dataset `ts_datasets.anomaly.Synthetic`. We begin by listing all the CSV files in the relevant directory. " ] }, { "cell_type": "code", "execution_count": 1, "id": "b4886d69", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../data/synthetic_anomaly/horizontal.csv\n", "../data/synthetic_anomaly/horizontal_dip_anomaly.csv\n", "../data/synthetic_anomaly/horizontal_level_anomaly.csv\n", "../data/synthetic_anomaly/horizontal_shock_anomaly.csv\n", "../data/synthetic_anomaly/horizontal_spike_anomaly.csv\n", "../data/synthetic_anomaly/horizontal_trend_anomaly.csv\n", "../data/synthetic_anomaly/seasonal.csv\n", "../data/synthetic_anomaly/seasonal_dip_anomaly.csv\n", "../data/synthetic_anomaly/seasonal_level_anomaly.csv\n", "../data/synthetic_anomaly/seasonal_shock_anomaly.csv\n", "../data/synthetic_anomaly/seasonal_spike_anomaly.csv\n", "../data/synthetic_anomaly/seasonal_trend_anomaly.csv\n", "../data/synthetic_anomaly/upward_downward.csv\n", "../data/synthetic_anomaly/upward_downward_dip_anomaly.csv\n", "../data/synthetic_anomaly/upward_downward_level_anomaly.csv\n", "../data/synthetic_anomaly/upward_downward_shock_anomaly.csv\n", "../data/synthetic_anomaly/upward_downward_spike_anomaly.csv\n", "../data/synthetic_anomaly/upward_downward_trend_anomaly.csv\n" ] } ], "source": [ "import glob\n", "import os\n", "anom_dir = os.path.join(\"..\", \"data\", \"synthetic_anomaly\")\n", "csvs = sorted(glob.glob(f\"{anom_dir}/*.csv\"))\n", "for csv in csvs:\n", " print(csv)" ] }, { "cell_type": "markdown", "id": "9d319673", "metadata": {}, "source": [ "Let's visualize what a couple of these CSVs look like." ] }, { "cell_type": "code", "execution_count": 2, "id": "3151334c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../data/synthetic_anomaly/horizontal.csv\n" ] }, { "data": { "text/html": [ "
\n", " | timestamp | \n", "horizontal | \n", "
---|---|---|
0 | \n", "0 | \n", "1.928031 | \n", "
1 | \n", "300 | \n", "-1.156620 | \n", "
2 | \n", "600 | \n", "-0.390650 | \n", "
3 | \n", "900 | \n", "0.400804 | \n", "
4 | \n", "1200 | \n", "-0.874490 | \n", "
... | \n", "... | \n", "... | \n", "
9995 | \n", "2998500 | \n", "0.362724 | \n", "
9996 | \n", "2998800 | \n", "2.657373 | \n", "
9997 | \n", "2999100 | \n", "1.472341 | \n", "
9998 | \n", "2999400 | \n", "1.033154 | \n", "
9999 | \n", "2999700 | \n", "2.950466 | \n", "
10000 rows × 2 columns
\n", "\n", " | timestamp | \n", "seasonal | \n", "anomaly | \n", "
---|---|---|---|
0 | \n", "0 | \n", "-0.577883 | \n", "0.0 | \n", "
1 | \n", "300 | \n", "1.059779 | \n", "0.0 | \n", "
2 | \n", "600 | \n", "1.137609 | \n", "0.0 | \n", "
3 | \n", "900 | \n", "0.743360 | \n", "0.0 | \n", "
4 | \n", "1200 | \n", "1.998400 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
9995 | \n", "2998500 | \n", "-5.388685 | \n", "0.0 | \n", "
9996 | \n", "2998800 | \n", "-5.017828 | \n", "0.0 | \n", "
9997 | \n", "2999100 | \n", "-4.196791 | \n", "0.0 | \n", "
9998 | \n", "2999400 | \n", "-4.234555 | \n", "0.0 | \n", "
9999 | \n", "2999700 | \n", "-3.111685 | \n", "0.0 | \n", "
10000 rows × 3 columns
\n", "\n", " | horizontal | \n", "
---|---|
timestamp | \n", "\n", " |
1970-01-01 00:00:00 | \n", "1.928031 | \n", "
1970-01-01 00:05:00 | \n", "-1.156620 | \n", "
1970-01-01 00:10:00 | \n", "-0.390650 | \n", "
1970-01-01 00:15:00 | \n", "0.400804 | \n", "
1970-01-01 00:20:00 | \n", "-0.874490 | \n", "
... | \n", "... | \n", "
1970-02-04 16:55:00 | \n", "0.362724 | \n", "
1970-02-04 17:00:00 | \n", "2.657373 | \n", "
1970-02-04 17:05:00 | \n", "1.472341 | \n", "
1970-02-04 17:10:00 | \n", "1.033154 | \n", "
1970-02-04 17:15:00 | \n", "2.950466 | \n", "
10000 rows × 1 columns
\n", "\n", " | anomaly | \n", "trainval | \n", "
---|---|---|
timestamp | \n", "\n", " | \n", " |
1970-01-01 00:00:00 | \n", "False | \n", "True | \n", "
1970-01-01 00:05:00 | \n", "False | \n", "True | \n", "
1970-01-01 00:10:00 | \n", "False | \n", "True | \n", "
1970-01-01 00:15:00 | \n", "False | \n", "True | \n", "
1970-01-01 00:20:00 | \n", "False | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "
1970-02-04 16:55:00 | \n", "False | \n", "False | \n", "
1970-02-04 17:00:00 | \n", "False | \n", "False | \n", "
1970-02-04 17:05:00 | \n", "False | \n", "False | \n", "
1970-02-04 17:10:00 | \n", "False | \n", "False | \n", "
1970-02-04 17:15:00 | \n", "False | \n", "False | \n", "
10000 rows × 2 columns
\n", "\n", " | Store | \n", "Dept | \n", "Date | \n", "Weekly_Sales | \n", "Temperature | \n", "Fuel_Price | \n", "MarkDown1 | \n", "MarkDown2 | \n", "MarkDown3 | \n", "MarkDown4 | \n", "MarkDown5 | \n", "CPI | \n", "Unemployment | \n", "IsHoliday | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "2010-02-05 | \n", "24924.50 | \n", "42.31 | \n", "2.572 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.096358 | \n", "8.106 | \n", "False | \n", "
1 | \n", "1 | \n", "1 | \n", "2010-02-12 | \n", "46039.49 | \n", "38.51 | \n", "2.548 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.242170 | \n", "8.106 | \n", "True | \n", "
2 | \n", "1 | \n", "1 | \n", "2010-02-19 | \n", "41595.55 | \n", "39.93 | \n", "2.514 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.289143 | \n", "8.106 | \n", "False | \n", "
3 | \n", "1 | \n", "1 | \n", "2010-02-26 | \n", "19403.54 | \n", "46.63 | \n", "2.561 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.319643 | \n", "8.106 | \n", "False | \n", "
4 | \n", "1 | \n", "1 | \n", "2010-03-05 | \n", "21827.90 | \n", "46.50 | \n", "2.625 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.350143 | \n", "8.106 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2855 | \n", "2 | \n", "10 | \n", "2012-09-28 | \n", "37104.67 | \n", "79.45 | \n", "3.666 | \n", "7106.05 | \n", "1.91 | \n", "1.65 | \n", "1549.10 | \n", "3946.03 | \n", "222.616433 | \n", "6.565 | \n", "False | \n", "
2856 | \n", "2 | \n", "10 | \n", "2012-10-05 | \n", "36361.28 | \n", "70.27 | \n", "3.617 | \n", "6037.76 | \n", "NaN | \n", "10.04 | \n", "3027.37 | \n", "3853.40 | \n", "222.815930 | \n", "6.170 | \n", "False | \n", "
2857 | \n", "2 | \n", "10 | \n", "2012-10-12 | \n", "35332.34 | \n", "60.97 | \n", "3.601 | \n", "2145.50 | \n", "NaN | \n", "33.31 | \n", "586.83 | \n", "10421.01 | \n", "223.015426 | \n", "6.170 | \n", "False | \n", "
2858 | \n", "2 | \n", "10 | \n", "2012-10-19 | \n", "35721.09 | \n", "68.08 | \n", "3.594 | \n", "4461.89 | \n", "NaN | \n", "1.14 | \n", "1579.67 | \n", "2642.29 | \n", "223.059808 | \n", "6.170 | \n", "False | \n", "
2859 | \n", "2 | \n", "10 | \n", "2012-10-26 | \n", "34260.76 | \n", "69.79 | \n", "3.506 | \n", "6152.59 | \n", "129.77 | \n", "200.00 | \n", "272.29 | \n", "2924.15 | \n", "223.078337 | \n", "6.170 | \n", "False | \n", "
2860 rows × 14 columns
\n", "\n", " | Weekly_Sales | \n", "Temperature | \n", "Fuel_Price | \n", "MarkDown1 | \n", "MarkDown2 | \n", "MarkDown3 | \n", "MarkDown4 | \n", "MarkDown5 | \n", "CPI | \n", "Unemployment | \n", "IsHoliday | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2010-02-05 | \n", "69634.80 | \n", "40.19 | \n", "2.572 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "210.752605 | \n", "8.324 | \n", "False | \n", "
2010-02-12 | \n", "63393.29 | \n", "38.49 | \n", "2.548 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "210.897994 | \n", "8.324 | \n", "True | \n", "
2010-02-19 | \n", "66589.27 | \n", "39.69 | \n", "2.514 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "210.945160 | \n", "8.324 | \n", "False | \n", "
2010-02-26 | \n", "61875.48 | \n", "46.10 | \n", "2.561 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "210.975957 | \n", "8.324 | \n", "False | \n", "
2010-03-05 | \n", "67041.18 | \n", "47.17 | \n", "2.625 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "211.006754 | \n", "8.324 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2012-09-28 | \n", "57424.00 | \n", "79.45 | \n", "3.666 | \n", "7106.05 | \n", "1.91 | \n", "1.65 | \n", "1549.10 | \n", "3946.03 | \n", "222.616433 | \n", "6.565 | \n", "False | \n", "
2012-10-05 | \n", "62955.51 | \n", "70.27 | \n", "3.617 | \n", "6037.76 | \n", "NaN | \n", "10.04 | \n", "3027.37 | \n", "3853.40 | \n", "222.815930 | \n", "6.170 | \n", "False | \n", "
2012-10-12 | \n", "63083.63 | \n", "60.97 | \n", "3.601 | \n", "2145.50 | \n", "NaN | \n", "33.31 | \n", "586.83 | \n", "10421.01 | \n", "223.015426 | \n", "6.170 | \n", "False | \n", "
2012-10-19 | \n", "60502.97 | \n", "68.08 | \n", "3.594 | \n", "4461.89 | \n", "NaN | \n", "1.14 | \n", "1579.67 | \n", "2642.29 | \n", "223.059808 | \n", "6.170 | \n", "False | \n", "
2012-10-26 | \n", "63992.36 | \n", "69.79 | \n", "3.506 | \n", "6152.59 | \n", "129.77 | \n", "200.00 | \n", "272.29 | \n", "2924.15 | \n", "223.078337 | \n", "6.170 | \n", "False | \n", "
143 rows × 11 columns
\n", "\n", " | trainval | \n", "Store | \n", "Dept | \n", "
---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " |
2010-02-05 | \n", "True | \n", "2 | \n", "8 | \n", "
2010-02-12 | \n", "True | \n", "2 | \n", "8 | \n", "
2010-02-19 | \n", "True | \n", "2 | \n", "8 | \n", "
2010-02-26 | \n", "True | \n", "2 | \n", "8 | \n", "
2010-03-05 | \n", "True | \n", "2 | \n", "8 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
2012-09-28 | \n", "False | \n", "2 | \n", "8 | \n", "
2012-10-05 | \n", "False | \n", "2 | \n", "8 | \n", "
2012-10-12 | \n", "False | \n", "2 | \n", "8 | \n", "
2012-10-19 | \n", "False | \n", "2 | \n", "8 | \n", "
2012-10-26 | \n", "False | \n", "2 | \n", "8 | \n", "
143 rows × 3 columns
\n", "