diff --git a/coordinator/ProcessorScraper.py b/coordinator/ProcessorScraper.py
index cee27963b2fc62b71aeb6cb671547ac4493921a2..9f3aa70f781ecef0a1efb812656d5e35fdc888f8 100644
--- a/coordinator/ProcessorScraper.py
+++ b/coordinator/ProcessorScraper.py
@@ -22,422 +22,451 @@ import certifi
 from numpy import where
 from pandas import concat, DataFrame, read_csv, Series, set_option
 
+from Processor import Processor
 # gitlab projects
 # TODO: Package these projects so they are robust for importing
 
 from flagdir import jobStatus # created by jws52
 from ProcessorUtils import add_filters_to_sublogger
 
-logger = logging.getLogger('Processor.Scraper')
-add_filters_to_sublogger(logger)
 
-# date format conforms to format used in SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
-# # ODK v1.11.2:
-# FMT = '%d-%b-%Y'
-# FMT_SHORT = '%d-%b-%Y'
-# ODK v1.18.0: 
-FMT = '%b %d, %Y %H:%m:%S %p'
-FMT_SHORT = '%b %d, %Y'
+class ProcessorScraper(Processor):
 
-# url location of latest news report search
-URL_DEFAULT = 'http://arrcc-viewer.herokuapp.com/assets/sample_data/data.zip' 
+    """ LIFECYCLE FUNCTIONS INHERITED FROM PROCESSOR.PY """
 
-def get_news_reports_from_url(job_dir: str, url = URL_DEFAULT) -> None:
-    '''Downloads the news report data available on the ARRCC media report 
-    dashboard, into the provided directory.
-    
-    Does not return anything'''
+    def process_pre_job(self, args):
+        return True
+
+    def process_in_job(self, jobPath, status, configjson, component) -> object:
+        return self.process_in_job_media_scraper(jobPath, status, configjson, component)
 
-    assert os.path.exists(job_dir)
+    def process_post_job(self, jobPath, configjson):
+        pass
 
-    # Get the zip file from the url and immediately write a local copy
-    fn_zip = f"{job_dir}/data.zip"
-    with open(fn_zip,'wb') as zipfile:
-        zipfile.write(requests.get(url).content)
+    def __init__(self) -> None:
+        super().__init__()
+        logger = logging.getLogger('Processor.Scraper')
+        add_filters_to_sublogger(logger)
 
-    # unzip it
-    dir_unzip = f"{job_dir}/data/"
-    Path(dir_unzip).mkdir(parents=True, exist_ok=False)
-    cmd_unzip = ['unzip',fn_zip,'-d',dir_unzip]
-    subprocess.run(cmd_unzip)
+    """ LIFECYCLE FUNCTIONS INHERITED FROM PROCESSOR.PY """
 
-    return
 
-def read_news_reports(job_dir: str) -> DataFrame:
-    '''Opens the news reports in the provided directory.
+    # date format conforms to format used in SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
+    # # ODK v1.11.2:
+    # FMT = '%d-%b-%Y'
+    # FMT_SHORT = '%d-%b-%Y'
+    # ODK v1.18.0:
+    FMT = '%b %d, %Y %H:%m:%S %p'
+    FMT_SHORT = '%b %d, %Y'
+
+    # url location of latest news report search
+    URL_DEFAULT = 'http://arrcc-viewer.herokuapp.com/assets/sample_data/data.zip'
+
+    def get_news_reports_from_url(self, job_dir: str, url = URL_DEFAULT) -> None:
+        '''Downloads the news report data available on the ARRCC media report
+        dashboard, into the provided directory.
+
+        Does not return anything'''
+
+        assert os.path.exists(job_dir)
+
+        # Get the zip file from the url and immediately write a local copy
+        fn_zip = f"{job_dir}/data.zip"
+        with open(fn_zip,'wb') as zipfile:
+            zipfile.write(requests.get(url).content)
+
+        # unzip it
+        dir_unzip = f"{job_dir}/data/"
+        Path(dir_unzip).mkdir(parents=True, exist_ok=False)
+        cmd_unzip = ['unzip',fn_zip,'-d',dir_unzip]
+        subprocess.run(cmd_unzip)
+
+        return
+
+    def read_news_reports(self, job_dir: str) -> DataFrame:
+        '''Opens the news reports in the provided directory.
+
+        Returns a pandas dataframe.'''
+
+        fn = f"{job_dir}/data/NEWS REPORTS.csv"
+
+        dateparse = lambda x: datetime.datetime.strptime(x, '%d-%m-%y')
+
+        df = read_csv(
+                fn,
+                index_col=0,
+                header=0,
+                parse_dates=['Date'],
+                date_parser=dateparse)
+
+        return df
+
+    def estimate_rust(
+        self,
+        description: Series,
+        disease: str,
+        return_type: str) -> Series:
+        '''Works with pandas series'''
+
+        # check for alternative naming
+        if disease in ['yellow','stripe']:
+
+            any_disease = description.str.contains('yellow') | description.str.contains('stripe')
+
+        else:
+            any_disease = description.str.contains(disease)
+
+        return_dict = {
+                'incidence':'medium',
+                'severity':'30'
+                }
+
+        prevalence = where(any_disease,return_dict[return_type],'na')
+
+        return prevalence
+
+    def guess_stage(
+            self,
+            date: Series,
+            country: Series) -> Series:
+
+        #TODO: provide typical phenology dates per country
+
+        # all of the country
+        # based on Moin's estimates from vegetative to ripening & maturing
+        # note his Start Date and End Dates are always 30 days apart
+        # so, this is sticking to the middle of the range
+        stage_start_dates_bangladesh = {
+                'tillering':'5 Dec', # ~26  days { combined 30 days to complete
+                'boot'     :'31 Dec', # ~4 days {
+                'heading'  :'4 Jan', # 20 days
+                'flowering':'24 Jan', # 10 days
+                'milk'     :'3 Feb', # 10 days
+                'dough'    :'13 Feb', # ~15 days { combined 25 days to complete
+                'maturity' :'28 Mar', # ~10 days {
+                'NA'       :'9 Mar'} # total ~95 days
+
+        # main season wheat in Terai
+        # based on Madan's estimates of min and max duration, taking the mean
+        # from vegetative to ripening & maturing
+        stage_start_dates_nepal_terai = {
+                'tillering':'24 Dec',  # ~ 56 days { combined 66 days to complete
+                'boot'     :'18 Feb',  # ~10 days {
+                'heading'  :'28 Feb',  # 10 days
+                'flowering':'9 Mar',   # 5 days
+                'milk'     :'14 Mar',  # ~12 days { combined 27 days to complete
+                'dough'    :'26 Mar',  # ~15 days {
+                'maturity' :'10 Apr',  # 10 days
+                'NA'       :'20 Apr'} # total ~118 days
+
+        # TODO: Less important: implement main season wheat in mid-hills from Madan's estimates
+        # and determine how to distinguish Terai from mid-hills in news report
+        stage_start_dates_nepal_midhills = {
+                'tillering':'',
+                'boot'     :'',
+                'heading'  :'',
+                'flowering':'',
+                'milk'     :'',
+                'dough'    :'',
+                'maturity' :'',
+                'NA'       :''}
+
+
+        # mainly for the north
+        # assume the same as Nepal Terai as for last year.
+        # TODO: get estimates specific to Pakistan
+        stage_start_dates_pakistan = stage_start_dates_nepal_terai
+
+        # mainly for Haryana district
+        # assume the same as Nepal Terai as for last year.
+        # TODO: get estimates specific to India NW districts
+        stage_start_dates_india = stage_start_dates_nepal_terai
+
+        stage_start_dates_by_country = {
+                'Bangladesh' : stage_start_dates_bangladesh,
+                'Nepal' : stage_start_dates_nepal_terai,
+                'India' : stage_start_dates_india,
+                'Pakistan' : stage_start_dates_pakistan}
+
+        df = DataFrame({'date':date,'country':country})
+        dates_by_entry = country.apply(lambda val: stage_start_dates_by_country[val])
+        df2 = DataFrame.from_records(dates_by_entry.values)
+        df2.index = df.index
+        df3 = concat([df,df2],axis='columns')
+
+        # handle Dec-Jan crossover (is there a neater way of doing this?)
+
+        df3['year'] = df3['date'].apply(lambda di: di.year)
+        df3['lastyear'] = df3['date'].apply(lambda di: di.year-1)
+
+        stages_thisyear = {coln:df3.apply(lambda row: datetime.datetime.strptime(f"{row[coln]} {row['year']}",'%d %b %Y'),axis='columns') for coln in stage_start_dates_bangladesh.keys()}
+        stages_lastyear = {coln:df3.apply(lambda row: datetime.datetime.strptime(f"{row[coln]} {row['lastyear']}",'%d %b %Y'),axis='columns') for coln in stage_start_dates_bangladesh.keys()}
+        df3_thisyear = DataFrame.from_records(stages_thisyear)
+        df3_lastyear = DataFrame.from_records(stages_lastyear)
+
+        # Use knowledge of order of phenological stages to determine which dates are from last year
+        stage_order = ['tillering','boot','heading','flowering','milk','dough','maturity','NA']
+
+        df4 = df3_thisyear[stage_order]
+
+        # check each stage in turn
+        for i,stage in enumerate(stage_order[:-1]):
+            stage_ip1 = stage_order[i+1]
+            # if the earlier stage has a later date, use last year's dates
+            df4[stage] = where(df4[stage]<df4[stage_ip1],df4[stage],df3_lastyear[stage])
+
+        # find out which stages start earlier than the survey date
+        date_compare = df4.le(date,axis='rows')
+
+        # get name of latest valid stage
+        stage_series = date_compare.apply(lambda row: row.iloc[::-1].idxmax(),axis='columns')
+
+        return stage_series
+
+    def reformat_news_reports(self, df_in: DataFrame) -> DataFrame:
+        '''Reformats a dataframe of news reports to match BGRI wheat rust survey
+        data entries (making assumptions where necessary). First checks input is as
+        expected.'''
+
+        #Check contents are as expected
+        cols = df_in.columns
+        expected_cols = ['Lon','Lat','Date','Type','Link','Country','State','District']
+
+        for expected_col in expected_cols:
+            assert expected_col in cols
+
+        # re-order dataframe, with newest entry last
+        df = df_in.copy()
+        df.sort_values('Date',ascending=True,inplace=True)
+
+        assumption_dict = {
+            'field_area' : 1}
+
+        output_dict = {
+            'SubmissionDate' : df['Date'],
+            'start' : df['Date'],
+            'end' : df['Date'],
+            'today' : df['Date'],
+            'deviceid' : 999,
+            'subscriberid' : 999,
+            'imei' : 999,
+            'phonenumber' : 999,
+            'username' : 999,
+            'country_list' : df['Country'],
+            'blast_rust' : 'Rust',
+            'surveyor_name' : 'News report',
+            'institution' : 'na',
+            'mobile_num' : 999,
+            'site_information-survey_site' : 'Farmer field',
+            'site_information-crop' : 'NA',
+            'site_information-field_area' : assumption_dict['field_area'],
+            'site_information-unit_m2' : 999,
+            'site_information-field_size' : 999,
+            'site_information-variety' : 'NA',
+            'site_information-growth_stage' : self.guess_stage(df['Date'],df['Country']),
+            'survey_infromation-location_name' : 999,
+            'survey_infromation-location_blast' : 999,
+            'survey_infromation-sampColor' : 999,
+            'survey_infromation-dateRange' : 999,
+            'survey_infromation-fieldNumber' : 999,
+            'survey_infromation-diseaseIncidencePercentage' : 999,
+            'survey_infromation-severityPercentage' : 999,
+            'survey_infromation-survey_date' : df['Date'].apply(lambda cell: cell.strftime(self.FMT_SHORT)),
+            'survey_infromation-site_name' : '"'+df['District'].astype(str)+', '+df['State'].astype(str)+', '+df['Country'].astype(str)+'"',
+            'survey_infromation-location-Latitude' : df['Lat'],
+            'survey_infromation-location-Longitude' : df['Lon'],
+            'survey_infromation-location-Altitude' : -999,
+            'survey_infromation-location-Accuracy' : -999,
+            'stem_rust-stemrust_incidence' : self.estimate_rust(df['Type'],'stem','incidence'),
+            'stem_rust-Stemrust_severity' : self.estimate_rust(df['Type'],'stem','severity'),
+            'stem_rust-stemrust_host_plant_reaction' : 'na',
+            'leaf_rust-leafrust_incidence' : self.estimate_rust(df['Type'],'leaf','incidence'),
+            'leaf_rust-leafrust_severity' : self.estimate_rust(df['Type'],'leaf','severity'),
+            'leaf_rust-leafrust_host_plant_reaction' : 'na',
+            'yellow_rust-yellowrust_incidence' : self.estimate_rust(df['Type'],'yellow','incidence'),
+            'yellow_rust-yellowrust_severity' : self.estimate_rust(df['Type'],'yellow','severity'),
+            'yellow_rust-yellowrust_host_plant_reaction' : 'na',
+            'septoria-septoria_incidence' : 'na',
+            'septoria-septoria_severity' : 'na',
+            'other_diseases_group-other_diseases' : -999,
+            'score_diseases_count' : -999,
+            'SET-OF-score_diseases' : -999,
+            'samples_collected' : -999,
+            'samples_type' : -999,
+            'sample_size-number_stemrust_live' : -999,
+            'sample_size-number_stemrust_dead_dna' : -999,
+            'sample_size-number_yellowrust_live' : -999,
+            'sample_size-number_yellowrust_dead' : -999,
+            'sample_size-number_leafrust_live' : -999,
+            'sample_size-using_barcode' : -999,
+            'live_stemrust_samples_count' : -999,
+            'SET-OF-live_stemrust_samples' : -999,
+            'dead_stemrust_samples_count' : -999,
+            'SET-OF-dead_stemrust_samples' : -999,
+            'live_yellowrust_samples_count' : -999,
+            'SET-OF-live_yellowrust_samples' : -999,
+            'dead_yellowrust_samples_count' : -999,
+            'SET-OF-dead_yellowrust_samples' : -999,
+            'live_leafrust_samples_count' : -999,
+            'SET-OF-live_leafrust_samples' : -999,
+            'comment' : df['Link'],
+            'meta-instanceID' : -999,
+            'meta-instanceName' : -999,
+            'KEY' : -999}
+
+        df_out = DataFrame(output_dict)
+
+        return df_out
+
+    EMAIL_MSG = """Subject: ARRCC latest scraped media reports
+    
+    Here is an update of what is on the ARRCC media scraper platform.
     
-    Returns a pandas dataframe.'''
-
-    fn = f"{job_dir}/data/NEWS REPORTS.csv"
-
-    dateparse = lambda x: datetime.datetime.strptime(x, '%d-%m-%y')
-
-    df = read_csv(
-            fn,
-            index_col=0,
-            header=0,
-            parse_dates=['Date'],
-            date_parser=dateparse)
-
-    return df
-
-def estimate_rust(
-    description: Series,
-    disease: str,
-    return_type: str) -> Series:
-    '''Works with pandas series'''
-
-    # check for alternative naming
-    if disease in ['yellow','stripe']:
-        
-        any_disease = description.str.contains('yellow') | description.str.contains('stripe')
-
-    else:
-        any_disease = description.str.contains(disease)
-
-    return_dict = {
-            'incidence':'medium',
-            'severity':'30'
-            }
-
-    prevalence = where(any_disease,return_dict[return_type],'na')
-
-    return prevalence
-
-def guess_stage(
-        date: Series,
-        country: Series) -> Series:
-
-    #TODO: provide typical phenology dates per country
-
-    # all of the country
-    # based on Moin's estimates from vegetative to ripening & maturing
-    # note his Start Date and End Dates are always 30 days apart
-    # so, this is sticking to the middle of the range
-    stage_start_dates_bangladesh = {
-            'tillering':'5 Dec', # ~26  days { combined 30 days to complete
-            'boot'     :'31 Dec', # ~4 days {
-            'heading'  :'4 Jan', # 20 days
-            'flowering':'24 Jan', # 10 days
-            'milk'     :'3 Feb', # 10 days 
-            'dough'    :'13 Feb', # ~15 days { combined 25 days to complete
-            'maturity' :'28 Mar', # ~10 days {
-            'NA'       :'9 Mar'} # total ~95 days
+    The latest entry is below. The full set for this season, auto-formatted for input to NAME source calcs in a basic way, is available at:
+    {0}
     
-    # main season wheat in Terai
-    # based on Madan's estimates of min and max duration, taking the mean
-    # from vegetative to ripening & maturing
-    stage_start_dates_nepal_terai = {
-            'tillering':'24 Dec',  # ~ 56 days { combined 66 days to complete
-            'boot'     :'18 Feb',  # ~10 days {
-            'heading'  :'28 Feb',  # 10 days
-            'flowering':'9 Mar',   # 5 days
-            'milk'     :'14 Mar',  # ~12 days { combined 27 days to complete
-            'dough'    :'26 Mar',  # ~15 days {
-            'maturity' :'10 Apr',  # 10 days
-            'NA'       :'20 Apr'} # total ~118 days
-
-    # TODO: Less important: implement main season wheat in mid-hills from Madan's estimates
-    # and determine how to distinguish Terai from mid-hills in news report
-    stage_start_dates_nepal_midhills = {
-            'tillering':'',
-            'boot'     :'',
-            'heading'  :'',
-            'flowering':'',
-            'milk'     :'',
-            'dough'    :'',
-            'maturity' :'',
-            'NA'       :''}
-
-
-    # mainly for the north
-    # assume the same as Nepal Terai as for last year.
-    # TODO: get estimates specific to Pakistan
-    stage_start_dates_pakistan = stage_start_dates_nepal_terai
-
-    # mainly for Haryana district
-    # assume the same as Nepal Terai as for last year.
-    # TODO: get estimates specific to India NW districts
-    stage_start_dates_india = stage_start_dates_nepal_terai
-
-    stage_start_dates_by_country = {
-            'Bangladesh' : stage_start_dates_bangladesh,
-            'Nepal' : stage_start_dates_nepal_terai,
-            'India' : stage_start_dates_india,
-            'Pakistan' : stage_start_dates_pakistan}
+    Check all new webpages for validity and extra info (e.g. field area, variety), then edit and copy any relevant entries to:
+    /storage/app/EWS_prod/regions/SouthAsia/resources/coordinator/assets/SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
     
-    df = DataFrame({'date':date,'country':country})
-    dates_by_entry = country.apply(lambda val: stage_start_dates_by_country[val])
-    df2 = DataFrame.from_records(dates_by_entry.values)
-    df2.index = df.index
-    df3 = concat([df,df2],axis='columns')
+    Then, check that the survey data processor succeeds with these new entries.
+    
+    Thanks, Jake
+    
+    {1}
+    """
 
-    # handle Dec-Jan crossover (is there a neater way of doing this?)
+    def send_email(
+            self,
+            output_fn: str,
+            data_str: str,
+            email_credential_fn: str,
+            ) -> None:
 
-    df3['year'] = df3['date'].apply(lambda di: di.year)
-    df3['lastyear'] = df3['date'].apply(lambda di: di.year-1)
 
-    stages_thisyear = {coln:df3.apply(lambda row: datetime.datetime.strptime(f"{row[coln]} {row['year']}",'%d %b %Y'),axis='columns') for coln in stage_start_dates_bangladesh.keys()}
-    stages_lastyear = {coln:df3.apply(lambda row: datetime.datetime.strptime(f"{row[coln]} {row['lastyear']}",'%d %b %Y'),axis='columns') for coln in stage_start_dates_bangladesh.keys()}
-    df3_thisyear = DataFrame.from_records(stages_thisyear)
-    df3_lastyear = DataFrame.from_records(stages_lastyear)
-    
-    # Use knowledge of order of phenological stages to determine which dates are from last year
-    stage_order = ['tillering','boot','heading','flowering','milk','dough','maturity','NA']
+        msg = self.EMAIL_MSG.format(output_fn,data_str)
 
-    df4 = df3_thisyear[stage_order]
+        with open(email_credential_fn,'r') as f:
+            gmail_config = json.load(f)
 
-    # check each stage in turn
-    for i,stage in enumerate(stage_order[:-1]): 
-        stage_ip1 = stage_order[i+1] 
-        # if the earlier stage has a later date, use last year's dates
-        df4[stage] = where(df4[stage]<df4[stage_ip1],df4[stage],df3_lastyear[stage])
+        maintainers = gmail_config['toaddrs']
 
-    # find out which stages start earlier than the survey date
-    date_compare = df4.le(date,axis='rows')
-    
-    # get name of latest valid stage
-    stage_series = date_compare.apply(lambda row: row.iloc[::-1].idxmax(),axis='columns')
-
-    return stage_series
-
-def reformat_news_reports(df_in: DataFrame) -> DataFrame:
-    '''Reformats a dataframe of news reports to match BGRI wheat rust survey 
-    data entries (making assumptions where necessary). First checks input is as
-    expected.'''
-
-    #Check contents are as expected
-    cols = df_in.columns
-    expected_cols = ['Lon','Lat','Date','Type','Link','Country','State','District']
-
-    for expected_col in expected_cols:
-        assert expected_col in cols
-
-    # re-order dataframe, with newest entry last
-    df = df_in.copy()
-    df.sort_values('Date',ascending=True,inplace=True)
-
-    assumption_dict = {
-        'field_area' : 1}
-
-    output_dict = {
-        'SubmissionDate' : df['Date'],
-        'start' : df['Date'],
-        'end' : df['Date'],
-        'today' : df['Date'],
-        'deviceid' : 999,
-        'subscriberid' : 999,
-        'imei' : 999,
-        'phonenumber' : 999,
-        'username' : 999,
-        'country_list' : df['Country'],
-        'blast_rust' : 'Rust',
-        'surveyor_name' : 'News report',
-        'institution' : 'na',
-        'mobile_num' : 999,
-        'site_information-survey_site' : 'Farmer field',
-        'site_information-crop' : 'NA',
-        'site_information-field_area' : assumption_dict['field_area'],
-        'site_information-unit_m2' : 999,
-        'site_information-field_size' : 999,
-        'site_information-variety' : 'NA',
-        'site_information-growth_stage' : guess_stage(df['Date'],df['Country']),
-        'survey_infromation-location_name' : 999,
-        'survey_infromation-location_blast' : 999,
-        'survey_infromation-sampColor' : 999,
-        'survey_infromation-dateRange' : 999,
-        'survey_infromation-fieldNumber' : 999,
-        'survey_infromation-diseaseIncidencePercentage' : 999,
-        'survey_infromation-severityPercentage' : 999,
-        'survey_infromation-survey_date' : df['Date'].apply(lambda cell: cell.strftime(FMT_SHORT)),
-        'survey_infromation-site_name' : '"'+df['District'].astype(str)+', '+df['State'].astype(str)+', '+df['Country'].astype(str)+'"',
-        'survey_infromation-location-Latitude' : df['Lat'],
-        'survey_infromation-location-Longitude' : df['Lon'],
-        'survey_infromation-location-Altitude' : -999,
-        'survey_infromation-location-Accuracy' : -999,
-        'stem_rust-stemrust_incidence' : estimate_rust(df['Type'],'stem','incidence'),
-        'stem_rust-Stemrust_severity' : estimate_rust(df['Type'],'stem','severity'),
-        'stem_rust-stemrust_host_plant_reaction' : 'na',
-        'leaf_rust-leafrust_incidence' : estimate_rust(df['Type'],'leaf','incidence'),
-        'leaf_rust-leafrust_severity' : estimate_rust(df['Type'],'leaf','severity'),
-        'leaf_rust-leafrust_host_plant_reaction' : 'na',
-        'yellow_rust-yellowrust_incidence' : estimate_rust(df['Type'],'yellow','incidence'),
-        'yellow_rust-yellowrust_severity' : estimate_rust(df['Type'],'yellow','severity'),
-        'yellow_rust-yellowrust_host_plant_reaction' : 'na',
-        'septoria-septoria_incidence' : 'na',
-        'septoria-septoria_severity' : 'na',
-        'other_diseases_group-other_diseases' : -999,
-        'score_diseases_count' : -999,
-        'SET-OF-score_diseases' : -999,
-        'samples_collected' : -999,
-        'samples_type' : -999,
-        'sample_size-number_stemrust_live' : -999,
-        'sample_size-number_stemrust_dead_dna' : -999,
-        'sample_size-number_yellowrust_live' : -999,
-        'sample_size-number_yellowrust_dead' : -999,
-        'sample_size-number_leafrust_live' : -999,
-        'sample_size-using_barcode' : -999,
-        'live_stemrust_samples_count' : -999,
-        'SET-OF-live_stemrust_samples' : -999,
-        'dead_stemrust_samples_count' : -999,
-        'SET-OF-dead_stemrust_samples' : -999,
-        'live_yellowrust_samples_count' : -999,
-        'SET-OF-live_yellowrust_samples' : -999,
-        'dead_yellowrust_samples_count' : -999,
-        'SET-OF-dead_yellowrust_samples' : -999,
-        'live_leafrust_samples_count' : -999,
-        'SET-OF-live_leafrust_samples' : -999,
-        'comment' : df['Link'],
-        'meta-instanceID' : -999,
-        'meta-instanceName' : -999,
-        'KEY' : -999}
-
-    df_out = DataFrame(output_dict)
-
-    return df_out
-
-EMAIL_MSG = """Subject: ARRCC latest scraped media reports
-
-Here is an update of what is on the ARRCC media scraper platform.
-
-The latest entry is below. The full set for this season, auto-formatted for input to NAME source calcs in a basic way, is available at:
-{0}
-
-Check all new webpages for validity and extra info (e.g. field area, variety), then edit and copy any relevant entries to:
-/storage/app/EWS_prod/regions/SouthAsia/resources/coordinator/assets/SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
-
-Then, check that the survey data processor succeeds with these new entries.
-
-Thanks, Jake
-
-{1}
-"""
-
-def send_email(
-        output_fn: str,
-        data_str: str,
-        email_credential_fn: str,
-        ) -> None:
-
-
-    msg = EMAIL_MSG.format(output_fn,data_str)
-
-    with open(email_credential_fn,'r') as f: 
-        gmail_config = json.load(f)
-    
-    maintainers = gmail_config['toaddrs']
-
-    # Create a secure SSL context
-    context = ssl.create_default_context(cafile=certifi.where())
-        
-    # It is indicated that gmail requires port 465 for SMTP_SSL, otherwise port
-    # 587 with .starttls() from
-    # https://realpython.com/python-send-email/#sending-a-plain-text-email I
-    # think port 587 is meant to make sense for the typical python logging
-    # smtphandler but that doesn't apply here
-    port = 465 # gmail_config['port']
-
-    with smtplib.SMTP_SSL(gmail_config['host'], port, context=context) as server:
-        
-        server.login(gmail_config['user'], gmail_config['pass'])
-        
-        server.sendmail(gmail_config['user'], maintainers, msg)
-
-        logger.info('Message sent!')
-
-    return
-
-def process_in_job_media_scraper(
-        jobPath: str,
-        status: jobStatus,
-        config: dict,
-        component: str = 'Scraper'
-        ) -> None:
-    """
-    1) Get a latest copy of the news report data from dashboard URL.
-    2) TODO: Reformat to match BGRI wheat rust survey data entries .
-    3) Filter to latest news reports.
-    4) Output to csv.
-    5) email any new reports to maintainers, so they can copy into 
-       SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv where appropriate.
-    """
-    
-    config_scraper = config['Scraper'].copy()
+        # Create a secure SSL context
+        context = ssl.create_default_context(cafile=certifi.where())
 
-    dateString = config['StartString']
+        # It is indicated that gmail requires port 465 for SMTP_SSL, otherwise port
+        # 587 with .starttls() from
+        # https://realpython.com/python-send-email/#sending-a-plain-text-email I
+        # think port 587 is meant to make sense for the typical python logging
+        # smtphandler but that doesn't apply here
+        port = 465 # gmail_config['port']
 
-    logger.info("1) Getting a latest copy of the news report data from dashboard URL")
-    
-    url = config['Scraper']['URL']
+        with smtplib.SMTP_SSL(gmail_config['host'], port, context=context) as server:
 
-    get_news_reports_from_url(jobPath,url)
+            server.login(gmail_config['user'], gmail_config['pass'])
 
-    reports_in = read_news_reports(jobPath)
+            server.sendmail(gmail_config['user'], maintainers, msg)
 
-    # TODO: Reformat
-    logger.info("2) Reformat to match BGRI wheat rust survey data entries")
-    
-    # (making assumptions where necessary)
-    reports = reformat_news_reports(reports_in)
+            logger.info('Message sent!')
 
-    logger.info("3) Filter to latest news reports")
+        return
 
-    this_season_starts_str = config['Scraper']['seasonStartString'] # '20201201'
-    this_season_starts = datetime.datetime.strptime(this_season_starts_str,'%Y%m%d')
+    def process_in_job_media_scraper(
+            self,
+            jobPath: str,
+            status: jobStatus,
+            config: dict,
+            component: str = 'Scraper'
+            ) -> None:
+        """
+        1) Get a latest copy of the news report data from dashboard URL.
+        2) TODO: Reformat to match BGRI wheat rust survey data entries .
+        3) Filter to latest news reports.
+        4) Output to csv.
+        5) email any new reports to maintainers, so they can copy into
+           SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv where appropriate.
+        """
 
-    latest_reports = reports[reports['SubmissionDate']>=this_season_starts]
+        config_scraper = config['Scraper'].copy()
 
-    # TODO: Low priority: Determine differences from last reformatted set of news reports
+        dateString = config['StartString']
 
-    logger.info("4) Output to csv")
+        logger.info("1) Getting a latest copy of the news report data from dashboard URL")
 
-    output_fn = f"{jobPath}/latest_reports_as_proxy_surveys.csv"
+        url = config['Scraper']['URL']
 
-    # date format conforms to format used in SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
-    latest_reports.to_csv(
-            output_fn,
-            index=False,
-            date_format=FMT)
-
-    is_sending_email = config['Scraper'].get('SendEmail',True)
-
-    if is_sending_email == True:
-
-        logger.info("5) email any new reports to maintainers, so they can copy into")
-        logger.info("SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv where appropriate")
-
-        selected_columns = [
-                'SubmissionDate',
-                'site_information-field_area',
-                'site_information-growth_stage',
-                'survey_infromation-site_name',
-                'country_list',
-                'stem_rust-stemrust_incidence',
-                'stem_rust-Stemrust_severity',
-                'yellow_rust-yellowrust_incidence',
-                'yellow_rust-yellowrust_severity',
-                'leaf_rust-leafrust_incidence',
-                'leaf_rust-leafrust_severity',
-                'comment']
-
-        # remove pandas display limitation, so full web address is shown from comment
-        set_option('display.max_colwidth', None)
-        latest_report_selection = latest_reports.iloc[-1,:].loc[selected_columns].__str__()
-        
-        # get the email credentials file path from the environment variables
-        assert 'EMAIL_CRED' in os.environ
-        email_credential_fn = os.environ['EMAIL_CRED']
-        assert os.path.exists(email_credential_fn)
-
-        send_email(
-                output_fn,
-                latest_report_selection,
-                email_credential_fn = email_credential_fn)
+        self.get_news_reports_from_url(jobPath,url)
+
+        reports_in = self.read_news_reports(jobPath)
+
+        # TODO: Reformat
+        logger.info("2) Reformat to match BGRI wheat rust survey data entries")
+
+        # (making assumptions where necessary)
+        reports = self.reformat_news_reports(reports_in)
 
-    proc_out = {}
-    # Output files available for upload
-    proc_out['output'] = None
-    # Processing files available for clearing
-    proc_out['clearup'] = None
+        logger.info("3) Filter to latest news reports")
 
-    return proc_out
+        this_season_starts_str = config['Scraper']['seasonStartString'] # '20201201'
+        this_season_starts = datetime.datetime.strptime(this_season_starts_str,'%Y%m%d')
+
+        latest_reports = reports[reports['SubmissionDate']>=this_season_starts]
+
+        # TODO: Low priority: Determine differences from last reformatted set of news reports
+
+        logger.info("4) Output to csv")
+
+        output_fn = f"{jobPath}/latest_reports_as_proxy_surveys.csv"
+
+        # date format conforms to format used in SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv
+        latest_reports.to_csv(
+                output_fn,
+                index=False,
+                date_format=self.FMT)
+
+        is_sending_email = config['Scraper'].get('SendEmail',True)
+
+        if is_sending_email == True:
+
+            logger.info("5) email any new reports to maintainers, so they can copy into")
+            logger.info("SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv where appropriate")
+
+            selected_columns = [
+                    'SubmissionDate',
+                    'site_information-field_area',
+                    'site_information-growth_stage',
+                    'survey_infromation-site_name',
+                    'country_list',
+                    'stem_rust-stemrust_incidence',
+                    'stem_rust-Stemrust_severity',
+                    'yellow_rust-yellowrust_incidence',
+                    'yellow_rust-yellowrust_severity',
+                    'leaf_rust-leafrust_incidence',
+                    'leaf_rust-leafrust_severity',
+                    'comment']
+
+            # remove pandas display limitation, so full web address is shown from comment
+            set_option('display.max_colwidth', None)
+            latest_report_selection = latest_reports.iloc[-1,:].loc[selected_columns].__str__()
+
+            # get the email credentials file path from the environment variables
+            assert 'EMAIL_CRED' in os.environ
+            email_credential_fn = os.environ['EMAIL_CRED']
+            assert os.path.exists(email_credential_fn)
+
+            self.send_email(
+                    output_fn,
+                    latest_report_selection,
+                    email_credential_fn = email_credential_fn)
+
+        proc_out = {}
+        # Output files available for upload
+        proc_out['output'] = None
+        # Processing files available for clearing
+        proc_out['clearup'] = None
+
+        return proc_out
+
+
+if __name__ == '__main__':
+    processor = ProcessorScraper()
+    processor.run_processor("Scraper")