Collect Data¶
Loading Data¶
csv-Files¶
import pandas as pd
df = pd.read_csv("https://github.com/hslu-ige-laes/edar/raw/master/sampleData/flatElectricity.csv", sep = ";")
df.head()
| time | FlatA_Ele | FlatB_Ele | FlatC_Ele | FlatD_Ele | |
|---|---|---|---|---|---|
| 0 | 2018-11-30 23:45:00 | 5619.889 | 7246.254 | 5125.006 | 8387.030 |
| 1 | 2018-12-01 00:00:00 | 5619.904 | 7246.551 | 5125.039 | 8387.085 |
| 2 | 2018-12-01 00:15:00 | 5619.929 | 7246.753 | 5125.077 | 8387.137 |
| 3 | 2018-12-01 00:30:00 | 5619.952 | 7246.933 | 5125.117 | 8387.181 |
| 4 | 2018-12-01 00:45:00 | 5619.973 | 7247.005 | 5125.153 | 8387.222 |
html-Documents¶
download_string = "https://www.agrometeo.ch/de/meteorologie/data?stations=190&sensors=1%3Aavg&scale=hour&groupBy=station"
readTable = pd.read_html(download_string, header=0, skiprows=1)
df = pd.DataFrame(readTable[0])
df.head()
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-2-5d636652dfc1> in <module>
1 download_string = "https://www.agrometeo.ch/de/meteorologie/data?stations=190&sensors=1%3Aavg&scale=hour&groupBy=station"
----> 2 readTable = pd.read_html(download_string, header=0, skiprows=1)
3 df = pd.DataFrame(readTable[0])
4 df.head()
/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
297 )
298 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 299 return func(*args, **kwargs)
300
301 return wrapper
/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
1083 io = stringify_path(io)
1084
-> 1085 return _parse(
1086 flavor=flavor,
1087 io=io,
/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
887 retained = None
888 for flav in flavor:
--> 889 parser = _parser_dispatch(flav)
890 p = parser(io, compiled_match, attrs, encoding, displayed_only)
891
/opt/hostedtoolcache/Python/3.8.9/x64/lib/python3.8/site-packages/pandas/io/html.py in _parser_dispatch(flavor)
844 else:
845 if not _HAS_LXML:
--> 846 raise ImportError("lxml not found, please install it")
847 return _valid_parsers[flavor]
848
ImportError: lxml not found, please install it
for a html-Parser: pip install html5lib
to extract html-data: pip install beautifulsoup4
Application Programming Interfaces¶
Many applications offer interfaces with which data can be specifically requested in a structured format. JSON is often used as the format.
import requests, json
endpoint = "http://my.meteoblue.com/packages/basic-day?apikey=41f2dd49fb6a&lat=47.5584&lon=7.5733&asl=279&tz=Europe%2FZurich&city=Basel&sig=3e85133f41896cd51894ac05fd8a9d0b"
data = json.loads(requests.get(endpoint).text)
data
{'metadata': {'name': '',
'latitude': 47.56,
'longitude': 7.57,
'height': 279,
'timezone_abbrevation': 'CEST',
'utc_timeoffset': 2.0,
'modelrun_utc': '2021-04-19 12:00',
'modelrun_updatetime_utc': '2021-04-19 19:21'},
'units': {'time': 'YYYY-MM-DD hh:mm',
'predictability': 'percent',
'precipitation_probability': 'percent',
'pressure': 'hPa',
'relativehumidity': 'percent',
'co': 'ug/m3',
'temperature': 'C',
'winddirection': 'degree',
'precipitation': 'mm',
'windspeed': 'ms-1'},
'data_day': {'time': ['2021-04-20',
'2021-04-21',
'2021-04-22',
'2021-04-23',
'2021-04-24',
'2021-04-25',
'2021-04-26'],
'pictocode': [2, 2, 3, 1, 1, 2, 2],
'uvindex': [3, 5, 4, 6, 6, 5, 4],
'temperature_max': [14.81, 16.11, 13.19, 14.28, 18.05, 18.51, 18.8],
'temperature_min': [3.77, 3.64, 6.25, 2.05, 1.19, 4.57, 5.81],
'temperature_mean': [9.65, 10.45, 9.39, 8.17, 9.76, 11.62, 12.09],
'felttemperature_max': [12.71, 14.29, 9.45, 11.1, 16.18, 16.41, 15.08],
'felttemperature_min': [1.35, 0.74, 2.84, -1.07, -2.4, 1.94, 3.29],
'winddirection': [45, 135, 0, 45, 90, 90, 45],
'precipitation_probability': [5, 28, 18, 2, 7, 9, 31],
'rainspot': ['0000000000000000000000000000000000000000000000000',
'2222221222222122211011110000911000019100000900000',
'0000010000000100000010000000000000000000000000000',
'0000000000000000000000000000000000000000000000000',
'0000000000000000000000000000000000000000000000000',
'0000000000000000000000000000000000000000000000000',
'0200000022900001002000000000000000000000000000000'],
'predictability_class': [4, 4, 4, 4, 4, 3, 2],
'predictability': [79, 65, 71, 71, 66, 56, 30],
'precipitation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'snowfraction': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'sealevelpressure_max': [1019, 1016, 1021, 1024, 1025, 1025, 1022],
'sealevelpressure_min': [1014, 1014, 1016, 1020, 1020, 1019, 1010],
'sealevelpressure_mean': [1017, 1015, 1018, 1022, 1023, 1022, 1015],
'windspeed_max': [1.82, 2.32, 3.44, 2.89, 2.11, 2.52, 3.94],
'windspeed_mean': [1.08, 1.48, 2.48, 2.04, 1.66, 1.67, 2.42],
'windspeed_min': [0.53, 0.75, 1.44, 1.15, 1.25, 0.94, 0.97],
'relativehumidity_max': [90, 84, 86, 78, 73, 78, 78],
'relativehumidity_min': [45, 43, 46, 40, 37, 39, 33],
'relativehumidity_mean': [67, 64, 68, 57, 56, 58, 55],
'convective_precipitation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'precipitation_hours': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'humiditygreater90_hours': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}}