Collect Data

Loading Data

csv-Files

import pandas as pd

df = pd.read_csv("https://github.com/hslu-ige-laes/edar/raw/master/sampleData/flatElectricity.csv", sep = ";")
df.head()
time FlatA_Ele FlatB_Ele FlatC_Ele FlatD_Ele
0 2018-11-30 23:45:00 5619.889 7246.254 5125.006 8387.030
1 2018-12-01 00:00:00 5619.904 7246.551 5125.039 8387.085
2 2018-12-01 00:15:00 5619.929 7246.753 5125.077 8387.137
3 2018-12-01 00:30:00 5619.952 7246.933 5125.117 8387.181
4 2018-12-01 00:45:00 5619.973 7247.005 5125.153 8387.222

html-Documents

    download_string = "https://www.agrometeo.ch/de/meteorologie/data?stations=190&sensors=1%3Aavg&scale=hour&groupBy=station"
    readTable = pd.read_html(download_string, header=0, skiprows=1)
    df = pd.DataFrame(readTable[0])
    df.head()
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-5d636652dfc1> in <module>
      1 download_string = "https://www.agrometeo.ch/de/meteorologie/data?stations=190&sensors=1%3Aavg&scale=hour&groupBy=station"
----> 2 readTable = pd.read_html(download_string, header=0, skiprows=1)
      3 df = pd.DataFrame(readTable[0])
      4 df.head()

/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    297                 )
    298                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 299             return func(*args, **kwargs)
    300 
    301         return wrapper

/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
   1083     io = stringify_path(io)
   1084 
-> 1085     return _parse(
   1086         flavor=flavor,
   1087         io=io,

/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
    887     retained = None
    888     for flav in flavor:
--> 889         parser = _parser_dispatch(flav)
    890         p = parser(io, compiled_match, attrs, encoding, displayed_only)
    891 

/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in _parser_dispatch(flavor)
    844     else:
    845         if not _HAS_LXML:
--> 846             raise ImportError("lxml not found, please install it")
    847     return _valid_parsers[flavor]
    848 

ImportError: lxml not found, please install it

for a html-Parser: pip install html5lib

to extract html-data: pip install beautifulsoup4

Application Programming Interfaces

Many applications offer interfaces with which data can be specifically requested in a structured format. JSON is often used as the format.

import requests, json
endpoint = "http://my.meteoblue.com/packages/basic-day?apikey=41f2dd49fb6a&lat=47.5584&lon=7.5733&asl=279&tz=Europe%2FZurich&city=Basel&sig=3e85133f41896cd51894ac05fd8a9d0b"
data = json.loads(requests.get(endpoint).text)
data
{'metadata': {'name': '',
  'latitude': 47.56,
  'longitude': 7.57,
  'height': 279,
  'timezone_abbrevation': 'CEST',
  'utc_timeoffset': 2.0,
  'modelrun_utc': '2021-04-19 12:00',
  'modelrun_updatetime_utc': '2021-04-19 19:21'},
 'units': {'time': 'YYYY-MM-DD hh:mm',
  'predictability': 'percent',
  'precipitation_probability': 'percent',
  'pressure': 'hPa',
  'relativehumidity': 'percent',
  'co': 'ug/m3',
  'temperature': 'C',
  'winddirection': 'degree',
  'precipitation': 'mm',
  'windspeed': 'ms-1'},
 'data_day': {'time': ['2021-04-20',
   '2021-04-21',
   '2021-04-22',
   '2021-04-23',
   '2021-04-24',
   '2021-04-25',
   '2021-04-26'],
  'pictocode': [2, 2, 3, 1, 1, 2, 2],
  'uvindex': [3, 5, 4, 6, 6, 5, 4],
  'temperature_max': [14.81, 16.11, 13.19, 14.28, 18.05, 18.51, 18.8],
  'temperature_min': [3.77, 3.64, 6.25, 2.05, 1.19, 4.57, 5.81],
  'temperature_mean': [9.65, 10.45, 9.39, 8.17, 9.76, 11.62, 12.09],
  'felttemperature_max': [12.71, 14.29, 9.45, 11.1, 16.18, 16.41, 15.08],
  'felttemperature_min': [1.35, 0.74, 2.84, -1.07, -2.4, 1.94, 3.29],
  'winddirection': [45, 135, 0, 45, 90, 90, 45],
  'precipitation_probability': [5, 28, 18, 2, 7, 9, 31],
  'rainspot': ['0000000000000000000000000000000000000000000000000',
   '2222221222222122211011110000911000019100000900000',
   '0000010000000100000010000000000000000000000000000',
   '0000000000000000000000000000000000000000000000000',
   '0000000000000000000000000000000000000000000000000',
   '0000000000000000000000000000000000000000000000000',
   '0200000022900001002000000000000000000000000000000'],
  'predictability_class': [4, 4, 4, 4, 4, 3, 2],
  'predictability': [79, 65, 71, 71, 66, 56, 30],
  'precipitation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  'snowfraction': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  'sealevelpressure_max': [1019, 1016, 1021, 1024, 1025, 1025, 1022],
  'sealevelpressure_min': [1014, 1014, 1016, 1020, 1020, 1019, 1010],
  'sealevelpressure_mean': [1017, 1015, 1018, 1022, 1023, 1022, 1015],
  'windspeed_max': [1.82, 2.32, 3.44, 2.89, 2.11, 2.52, 3.94],
  'windspeed_mean': [1.08, 1.48, 2.48, 2.04, 1.66, 1.67, 2.42],
  'windspeed_min': [0.53, 0.75, 1.44, 1.15, 1.25, 0.94, 0.97],
  'relativehumidity_max': [90, 84, 86, 78, 73, 78, 78],
  'relativehumidity_min': [45, 43, 46, 40, 37, 39, 33],
  'relativehumidity_mean': [67, 64, 68, 57, 56, 58, 55],
  'convective_precipitation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  'precipitation_hours': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  'humiditygreater90_hours': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}}

Saving Files

csv-Files

df.to_csv (r'C:\temp\exportCsvFile.csv', index = False, header=True, sep=";")