Yeah … mostly saving this for the regex search with a start and end flag that spans newlines because I don’t really need to know the date they collect each waste stream in the Netherlands. Although it’s cool that they’ve got five different waste streams to collect.
import requests import re strBaseURL = 'https://afvalkalender.waalre.nl/adres/<some component of your address in the Netherlands>' iTimeout = 600 strHeader = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'} # Start and end flags for waste stream collection schedule content START = '<ul id="ophaaldata" class="line">' END = '</ul>' page = requests.get(strBaseURL, timeout=iTimeout, headers=strHeader) strContent = page.content strContent = strContent.decode("utf-8") result = re.search('{}(.*?){}'.format(START, END), strContent, re.DOTALL) strCollectionDateSource = result.group(1) resultWasteStreamData = re.findall('<li>(.*?)</li>', strCollectionDateSource, re.DOTALL) for strWasteStreamRecord in resultWasteStreamData: listWasteStreamRecord = strWasteStreamRecord.split("\n") strDate = listWasteStreamRecord[3] strWasteType = listWasteStreamRecord[4] print("On {}, they collect {}".format(strDate.strip().replace('<i class="date">','').replace('</i>',''), strWasteType.strip().replace('<i>','').replace('</i>','')))