From 323da61acd0dcd7330370fa2b25f2a0b248952ec Mon Sep 17 00:00:00 2001
From: Eduardo Gonzalez Solares <eglez@ast.cam.ac.uk>
Date: Fri, 10 May 2019 11:14:31 +0100
Subject: [PATCH] Add preprocessing step

---
 stpt_pipeline/main.py       | 130 ++++++++------------------
 stpt_pipeline/preprocess.py | 176 ++++++++++++++++++++++++++++++++++++
 2 files changed, 215 insertions(+), 91 deletions(-)
 create mode 100644 stpt_pipeline/preprocess.py

diff --git a/stpt_pipeline/main.py b/stpt_pipeline/main.py
index 8ffe81f..cbbe42a 100644
--- a/stpt_pipeline/main.py
+++ b/stpt_pipeline/main.py
@@ -2,7 +2,6 @@ import logging
 from os import listdir
 from pathlib import Path
 
-import dask.array as da
 import numpy as np
 from dask import delayed  # noqa: F401
 from distributed import Client, wait
@@ -10,69 +9,12 @@ from scipy.ndimage import geometric_transform
 
 from .chi_functions import find_overlap_conf, read_mosaicifile_stpt
 from .mosaic_functions import find_delta, get_img_cube, get_img_list, get_mosaic_file
+from .preprocess import preprocess
 from .settings import Settings
-from .stpt_displacement import defringe, get_coords, magic_function
 
 log = logging.getLogger('owl.daemon.pipeline')
 
 
-def read_flatfield(flat_file: Path):
-    """[summary]
-
-    Parameters
-    ----------
-    flat_file : [type]
-        [description]
-
-    Returns
-    -------
-    [type]
-        [description]
-    """
-    log.info('Reading flatfield %s', flat_file)
-    if Settings.do_flat:
-        fl = np.load(flat_file)
-        channel = Settings.channel_to_use - 1
-        flat = fl[:, :, channel]
-        if Settings.do_defringe:
-            fr_img = defringe(flat)
-            nflat = (flat - fr_img) / np.median(flat)
-        else:
-            nflat = flat / np.median(flat)
-        nflat[nflat < 0.5] = 1.0
-    else:
-        nflat = 1.0
-    return nflat
-
-
-def list_directories(root_dir: Path):
-    """[summary]
-
-    This lists all the subdirectories. Once there is an
-    standarized naming convention this will have to be edited,
-    as at the moment looks for dir names with the '4t1' string
-    on them, as all the experiments had this
-
-    Parameters
-    ----------
-    root_dir : [type]
-        [description]
-
-    Returns
-    -------
-    [type]
-        [description]
-    """
-    dirs = []
-    for this_file in listdir(root_dir):
-        if this_file.find('4t1') > -1:
-            if this_file.find('.txt') > -1:
-                continue
-            dirs.append(root_dir / this_file)
-    dirs.sort()
-    return dirs
-
-
 def main(*, root_dir: Path, flat_file: Path, output_dir: Path):
     """[summary]
 
@@ -85,13 +27,14 @@ def main(*, root_dir: Path, flat_file: Path, output_dir: Path):
     output_dir : Path
         [description]
     """
-    client = Client.current()
+    #  client = Client.current()
+    z = preprocess(root_dir, flat_file, output_dir)
+    sections = z.attrs['sections']
+    for section in range(sections):
+        log.info('Processing section %s', section)
 
-    nflat = delayed(read_flatfield)(flat_file)
-
-    log.info('Getting directory listing %s', root_dir)
-    dirs = list_directories(root_dir)
 
+"""
     for this_dir in dirs:
         log.info('Processing directory %s', this_dir)
         # read mosaic file, get delta in microns
@@ -154,6 +97,8 @@ def main(*, root_dir: Path, flat_file: Path, output_dir: Path):
         dx_mat = np.zeros((len(dx), len(dx))) - 9999
         dy_mat = np.zeros((len(dx), len(dx))) - 9999
         #
+        results = []
+        data_desc = []
         for i in range(len(dx0)):
             #
             # This is more or less the distance between images in detectors,
@@ -164,8 +109,6 @@ def main(*, root_dir: Path, flat_file: Path, output_dir: Path):
             r[i] = 100
             i_t = np.where(r <= 1)[0]
             #
-            results = []
-            data_desc = []
             for this_img in i_t:
                 if dx_mat[i, this_img] != -9999:
                     # already done
@@ -236,28 +179,33 @@ def main(*, root_dir: Path, flat_file: Path, output_dir: Path):
                 results.append(res)
                 data_desc.append([ORIENTATION, ind_ref, ind_obj])
 
-            futures = client.compute(results)
-            wait(futures)
-            for dd, fut in zip(data_desc, futures):
-                dx, dy, mer = fut.result()
-                ORIENTATION, ind_ref, ind_obj = dd
-                log.info('%s %s', dx, dy)
-
-                if ORIENTATION == 'Y':
-                    dx_mat[ind_ref, ind_obj] = dx
-                    dy_mat[ind_ref, ind_obj] = dy
-                    #
-                    dx_mat[ind_obj, ind_ref] = -dx
-                    dy_mat[ind_obj, ind_ref] = -dy
-                if ORIENTATION == 'X':
-                    dx_mat[ind_ref, ind_obj] = dy
-                    dy_mat[ind_ref, ind_obj] = -dx
-                    #
-                    dx_mat[ind_obj, ind_ref] = -dy
-                    dy_mat[ind_obj, ind_ref] = dx
-
-        out = output_dir / this_dir.name
-        out.mkdir(exist_ok=True)
-        log.debug('Saving results to %s', out)
-        np.save(out / 'desp_dist_x', dx_mat)
-        np.save(out / 'desp_dist_y', dy_mat)
+            if len(results) > 5:
+                futures = client.compute(results)
+                wait(futures)
+                for dd, fut in zip(data_desc, futures):
+                    dx, dy, mer = fut.result()
+                    ORIENTATION, ind_ref, ind_obj = dd
+                    log.info('%s %s', dx, dy)
+
+                    if ORIENTATION == 'Y':
+                        dx_mat[ind_ref, ind_obj] = dx
+                        dy_mat[ind_ref, ind_obj] = dy
+                        #
+                        dx_mat[ind_obj, ind_ref] = -dx
+                        dy_mat[ind_obj, ind_ref] = -dy
+                    if ORIENTATION == 'X':
+                        dx_mat[ind_ref, ind_obj] = dy
+                        dy_mat[ind_ref, ind_obj] = -dx
+                        #
+                        dx_mat[ind_obj, ind_ref] = -dy
+                        dy_mat[ind_obj, ind_ref] = dx
+
+                results, data_desc = [], []
+
+                out = output_dir / this_dir.name
+                out.mkdir(exist_ok=True)
+                log.debug('Saving results to %s', out)
+                np.save(out / 'desp_dist_x', dx_mat)
+                np.save(out / 'desp_dist_y', dy_mat)
+
+"""
diff --git a/stpt_pipeline/preprocess.py b/stpt_pipeline/preprocess.py
new file mode 100644
index 0000000..046b83b
--- /dev/null
+++ b/stpt_pipeline/preprocess.py
@@ -0,0 +1,176 @@
+import logging
+import re
+from os import listdir
+from pathlib import Path
+
+import numpy as np
+import zarr
+from dask import delayed
+from distributed import Client, wait
+from scipy.ndimage import geometric_transform
+
+from imaxt_image.image import TiffImage
+from stpt_pipeline.utils import get_coords
+
+from .mosaic_functions import get_mosaic_file
+from .settings import Settings
+from .stpt_displacement import defringe, magic_function
+
+log = logging.getLogger('owl.daemon.pipeline')
+
+
+def read_flatfield(flat_file: Path):
+    """[summary]
+
+    Parameters
+    ----------
+    flat_file : [type]
+        [description]
+
+    Returns
+    -------
+    [type]
+        [description]
+    """
+    log.info('Reading flatfield %s', flat_file)
+    if Settings.do_flat:
+        fl = np.load(flat_file)
+        channel = Settings.channel_to_use - 1
+        flat = fl[:, :, channel]
+        if Settings.do_defringe:
+            fr_img = defringe(flat)
+            nflat = (flat - fr_img) / np.median(flat)
+        else:
+            nflat = flat / np.median(flat)
+        nflat[nflat < 0.5] = 1.0
+    else:
+        nflat = 1.0
+    return nflat
+
+
+def list_directories(root_dir: Path):
+    """[summary]
+
+    This lists all the subdirectories. Once there is an
+    standarized naming convention this will have to be edited,
+    as at the moment looks for dir names with the '4t1' string
+    on them, as all the experiments had this
+
+    Parameters
+    ----------
+    root_dir : [type]
+        [description]
+
+    Returns
+    -------
+    [type]
+        [description]
+    """
+    dirs = []
+    for this_file in listdir(root_dir):
+        if this_file.find('4t1') > -1:
+            if this_file.find('.txt') > -1:
+                continue
+            dirs.append(root_dir / this_file)
+    dirs.sort()
+    return dirs
+
+
+def save_image(filename, z, section, first_offset, layers):
+    img = TiffImage(filename)
+    match = (
+        re.compile(r'(?P<offset>\d+)_(?P<channel>\d\d).tif$')
+        .search(filename.name)
+        .groupdict()
+    )
+    offset, channel = int(match['offset']), match['channel']
+    fov = (offset - first_offset) // layers
+    zslice = (offset - first_offset) - fov * layers
+    g = z.create_group(f'section={section}/fov={fov}/z={zslice}/channel={channel}')
+    d = g.create_dataset('raw', data=img.asarray(), chunks=False)
+    print(d)
+    return d
+
+
+def geom(d, flat=1):
+    shapes = (Settings.x_max - Settings.x_min, Settings.y_max - Settings.y_min)
+    cropped = magic_function(d[:], flat=flat)
+    new = geometric_transform(
+        cropped.astype('float32'),
+        get_coords,
+        output_shape=shapes,
+        extra_arguments=(Settings.cof_dist, shapes[0] * 0.5, shapes[0] * 1.0),
+        mode='constant',
+        cval=0.0,
+        order=1,
+        prefilter=False,
+    )
+
+    fh = zarr.group(store=d.store)
+    path = d.path.replace('/raw', '')
+    g = fh[path]
+    dd = g.create_dataset(
+        'geom', data=new.astype('float32'), chunks=False, overwrite=True
+    )
+    print(dd)
+    return dd
+
+
+def preprocess(root_dir: Path, flat_file: Path, output_dir: Path):
+    """[summary]
+    
+    Parameters
+    ----------
+    root_dir : Path
+        [description]
+    flat_file : Path
+        [description]
+    output_dir : Path
+        [description]
+    
+    Returns
+    -------
+    [type]
+        [description]
+    """
+
+    nflat = delayed(read_flatfield)(flat_file).persist()
+
+    out = f'{output_dir / root_dir.name}.zarr'
+    log.info('Storing images in %s', out)
+    z = zarr.group(out, 'w')
+
+    dirs = list_directories(root_dir)
+    for d in dirs:
+        # TODO: All this should be metadata
+        section = re.compile(r'\d\d\d\d$').search(d.name).group()
+        mosaic_file = get_mosaic_file(d)
+        res = open(d / mosaic_file, 'r').read()
+        mrows = re.compile(r'mrows:(\d+)\n').search(res).groups()[0]
+        mcolumns = re.compile(r'mcolumns:(\d+)\n').search(res).groups()[0]
+        mrows, mcolumns = int(mrows), int(mcolumns)
+        fovs = mrows * mcolumns
+        files = sorted(list(d.glob('*.tif')))
+        files_ch1 = sorted(list(d.glob('*_01.tif')))
+        layers = len(files_ch1) // fovs
+        first_offset = int(
+            re.compile(r'-(\d+)_\d+.tif').search(files[0].name).groups()[0]
+        )
+
+        res = []
+        for f in d.glob('*.tif'):
+            r = delayed(save_image)(f, z, section, first_offset, layers)
+            g = delayed(geom)(r, flat=nflat)
+            res.append(g)
+
+        client = Client.current()
+        fut = client.compute(res)
+        wait(fut)
+
+    # Write metadata
+    z.attrs['sections'] = len(dirs)  # TODO: Read from mosaic file
+    z.attrs['fovs'] = fovs
+    z.attrs['mrows'] = mrows
+    z.attrs['mcolumns'] = mcolumns
+    z.attrs['layers'] = layers
+    return z
-- 
GitLab