From cf14a8fe3a3968cbc885addcad07a067c2be64af Mon Sep 17 00:00:00 2001 From: jws52 <jws52@cam.ac.uk> Date: Fri, 19 May 2023 14:13:26 +0100 Subject: [PATCH] refactor: Scraper uses http get immediately An error once happened where it seems the request content was lost before it could be used. This may mitigate the risk of it happening again. --- coordinator/ProcessorScraper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/coordinator/ProcessorScraper.py b/coordinator/ProcessorScraper.py index 1a36b3a..cee2796 100644 --- a/coordinator/ProcessorScraper.py +++ b/coordinator/ProcessorScraper.py @@ -50,12 +50,10 @@ def get_news_reports_from_url(job_dir: str, url = URL_DEFAULT) -> None: assert os.path.exists(job_dir) - r = requests.get(url) - - # write a local copy of the zip file + # Get the zip file from the url and immediately write a local copy fn_zip = f"{job_dir}/data.zip" with open(fn_zip,'wb') as zipfile: - zipfile.write(r.content) + zipfile.write(requests.get(url).content) # unzip it dir_unzip = f"{job_dir}/data/" -- GitLab