From c67ab60de95008178e26817946990382751d91d8 Mon Sep 17 00:00:00 2001
From: Bernardo TOD <bernardo.tod52@gmail.com>
Date: Thu, 23 May 2019 07:52:45 +0200
Subject: [PATCH 1/1] Initialize

---
 formatdicom.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++
 helpers.py     |   2 +
 readme.txt     |  10 +++++
 regularjson.py |  99 +++++++++++++++++++++++++++++++++++++++++++++
 topng.py       |  97 ++++++++++++++++++++++++++++++++++++++++++++
 tosampledir.py |  18 +++++++++
 totraindir.py  |  77 +++++++++++++++++++++++++++++++++++
 7 files changed, 410 insertions(+)
 create mode 100644 formatdicom.py
 create mode 100644 helpers.py
 create mode 100644 readme.txt
 create mode 100644 regularjson.py
 create mode 100644 topng.py
 create mode 100644 tosampledir.py
 create mode 100644 totraindir.py

diff --git a/formatdicom.py b/formatdicom.py
new file mode 100644
index 0000000..54aa277
--- /dev/null
+++ b/formatdicom.py
@@ -0,0 +1,107 @@
+import os
+import png
+import dicom
+import argparse
+
+
+def mri_to_png(mri_file, png_file):
+    """ Function to convert from a DICOM image to png
+
+        @param mri_file: An opened file like object to read te dicom data
+        @param png_file: An opened file like object to write the png data
+    """
+
+    # Extracting data from the mri file
+    plan = dicom.read_file(mri_file)
+    shape = plan.pixel_array.shape
+
+    image_2d = []
+    max_val = 0
+    for row in plan.pixel_array:
+        pixels = []
+        for col in row:
+            pixels.append(col)
+            if col > max_val: max_val = col
+        image_2d.append(pixels)
+
+    # Rescaling grey scale between 0-255
+    image_2d_scaled = []
+    for row in image_2d:
+        row_scaled = []
+        for col in row:
+            col_scaled = int((float(col) / float(max_val)) * 255.0)
+            row_scaled.append(col_scaled)
+        image_2d_scaled.append(row_scaled)
+
+    # Writing the PNG file
+    w = png.Writer(shape[0], shape[1], greyscale=True)
+    w.write(png_file, image_2d_scaled)
+
+
+def convert_file(mri_file_path, png_file_path):
+    """ Function to convert an MRI binary file to a
+        PNG image file.
+
+        @param mri_file_path: Full path to the mri file
+        @param png_file_path: Fill path to the png file
+    """
+
+    # Making sure that the mri file exists
+    if not os.path.exists(mri_file_path):
+        raise Exception('File "%s" does not exists' % mri_file_path)
+
+    # Making sure the png file does not exist
+    if os.path.exists(png_file_path):
+        raise Exception('File "%s" already exists' % png_file_path)
+
+    mri_file = open(mri_file_path, 'rb')
+    png_file = open(png_file_path, 'wb')
+
+    mri_to_png(mri_file, png_file)
+
+    png_file.close()
+
+
+def convert_folder(mri_folder, png_folder):
+    """ Convert all MRI files in a folder to png files
+        in a destination folder
+    """
+
+    # Create the folder for the pnd directory structure
+    os.makedirs(png_folder)
+
+    # Recursively traverse all sub-folders in the path
+    for mri_sub_folder, subdirs, files in os.walk(mri_folder):
+        for mri_file in os.listdir(mri_sub_folder):
+            mri_file_path = os.path.join(mri_sub_folder, mri_file)
+
+            # Make sure path is an actual file
+            if os.path.isfile(mri_file_path):
+
+                # Replicate the original file structure
+                rel_path = os.path.relpath(mri_sub_folder, mri_folder)
+                png_folder_path = os.path.join(png_folder, rel_path)
+                if not os.path.exists(png_folder_path):
+                    os.makedirs(png_folder_path)
+                png_file_path = os.path.join(png_folder_path, '%s.png' % mri_file)
+
+                try:
+                    # Convert the actual file
+                    convert_file(mri_file_path, png_file_path)
+                    print('SUCCESS>', mri_file_path, '-->', png_file_path)
+                except Exception as e:
+                    print('FAIL>', mri_file_path, '-->', png_file_path, ':', e)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Convert a dicom MRI file to png")
+    parser.add_argument('-f', action='store_true')
+    parser.add_argument('dicom_path', help='Full path to the mri file')
+    parser.add_argument('png_path', help='Full path to the generated png file')
+
+    args = parser.parse_args()
+    print(args)
+    if args.f:
+        convert_folder(args.dicom_path, args.png_path)
+    else:
+        convert_file(args.dicom_path, args.png_path)
\ No newline at end of file
diff --git a/helpers.py b/helpers.py
new file mode 100644
index 0000000..9784900
--- /dev/null
+++ b/helpers.py
@@ -0,0 +1,2 @@
+def getdir(filepath):
+	return '/'.join(filepath.split('/')[:-1]) + '/'
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..28b12aa
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,10 @@
+# ----------   README
+
+# Edit constants in regularjson.py file, and execute it 
+python3 regularjson.py
+
+# Edit constants in totraindir.py file, and execute it 
+python3 totraindir.py
+
+# Edit paths in tosample.py file, and execute it 
+python3 tosample.py
\ No newline at end of file
diff --git a/regularjson.py b/regularjson.py
new file mode 100644
index 0000000..fa89890
--- /dev/null
+++ b/regularjson.py
@@ -0,0 +1,99 @@
+"""
+by @res
+
+1) Read -R json_GT* and recup "Feature Refs" & labels (0: Infarctus, 1: No
+2) store "Feature Refs" & labels
+    Format:
+        {
+            nb_infarct: 2,
+            infarcts: [],
+            no_infarcts: [],
+        }
+"""
+from os import listdir as ls
+from os.path import getsize as size, join
+from pprint import pprint
+
+
+# constants
+# json
+#   |--> json_GT
+#   |--> json_GT_part2
+RT_PATH = '../../Data/json/'
+JSON_GTS = ['json_GT', 'json_GT_part2']
+INFA_STR = 'Infa'
+
+# Globals
+featurerefs = []
+labellist = []
+details = []
+
+from os.path import isfile, join
+
+def search(path, name, recursive=True, fsize=True):
+    """
+    :param path:
+    :param name:
+    :param recursive: Recursive or not
+    :param fsize: show the size of not
+    :return: {path: p, size: None}
+    """
+    for d in ls(path):
+        p = join(path, d)
+        if name in d:
+            return {'path': p, 'size': size(p) if fsize else None}
+        elif recursive and not isfile(p):# only enter if it's a directory
+            res = search(p, name)# stop if found
+            if res:
+                return res
+
+def merge(dirlist, root='', indice='Case'):
+    l = []
+    for d in dirlist:
+        l += [e for e in ls(join(root, d)) if indice in e]
+
+    return sorted(l)
+
+def mergejsons():
+    return merge(JSON_GTS, RT_PATH)
+
+def featurerefs():
+    """
+        fill l with features references
+        use JSON_GTS, a little bit static in this sens
+    :return:
+    """
+    global featurerefs
+    # for d in JSON_GTS:
+    #     #frefs += ls(RT_PATH + d)
+    #     featurerefs += [e for e in ls(join(RT_PATH, d)) if "Case" in e]
+
+    featurerefs = sorted(mergejsons())
+
+def label(fref):
+    res = search(RT_PATH, fref)
+    b = search(res['path'], INFA_STR)
+    return (0, b['size']) if b else (1, None)
+
+def labels():
+    global labellist, details
+    for name in featurerefs:
+        lab, detail = label(name)
+        labellist.append(lab)
+        details.append(detail)
+
+
+
+if __name__ == '__main__':
+    featurerefs()
+    labels()
+
+    print('featurerefs:', len(featurerefs), featurerefs)
+    print('labels:', len(labellist), labellist)
+    print('details:', len(details), details)
+
+    # print( label('Case_02') )
+    # print( search(RT_PATH, INFA_STR) )
+
+
+
diff --git a/topng.py b/topng.py
new file mode 100644
index 0000000..9651446
--- /dev/null
+++ b/topng.py
@@ -0,0 +1,97 @@
+import cv2
+import os
+from os.path import isdir, join
+from os import mkdir
+import pydicom
+from pprint import pprint
+import numpy as np
+import pathlib
+
+from decimal import Decimal as d, ROUND_HALF_UP as rhu
+
+PNG16_MAX = pow(2, 16) - 1 # here if thinks the heaviest weight bit is for transparency or something not in use with dicom imgs
+PNG8_MAX = pow(2, 8+1) - 1 # heaviest weight bit is 8 => 2**8, but dont forget the others: the reason of +1
+
+INPUT_DIR = '../../Data/Images_anonymous/Case_0449/'
+OUT_DIR = './generated/'
+#os.mkdir(outdir)
+
+def map16(array):# can be useful in future
+	return array * 16
+
+# def dround(n):
+# 	return d(str(n)).quantize(d('1'), rounding=rhu)# safe round without
+
+def affine(Mat, ab, cd):
+	"""
+		Affine transformation
+		ab: the 'from' interval (2, 384) or {begin:2, end:384} begin is 0, and end is 1
+		cd: the 'to' interval (0, 1024) {begin:0, end:1024}
+	"""
+	a, b = ab[0], ab[1]
+	c, d = cd[0], cd[1]
+	
+	with np.nditer(Mat, op_flags=['readwrite']) as M:
+		for x in M:
+			x[...] = max( 0, round( (x-a) * (d-c) / (b-a) + c ) ) # could not be negative
+
+
+def getdir(filepath):
+	return '/'.join(filepath.split('/')[:-1]) + '/'
+
+def topng(inputfile, outfile=None, overwrite=True):
+	try:
+		dicimg = pydicom.read_file(inputfile) # read dicom image
+	except pydicom.errors.InvalidDicomError as e:
+		# @TODO: log, i can't read this file
+		return
+	img = dicimg.pixel_array# get image array (12bits)
+
+	# test <<
+	print('img', img)
+	# print('img', type(img))
+	# print('min', img.min(), 'max', img.max())
+	# dicimg.convert_pixel_data() # same as using dicimg.pixel_array
+	# pixa = dicimg._pixel_array
+	# print('dicimg._pixel_array', pixa)
+	# print('dicimg.pixel_array==pixa', dicimg.pixel_array==pixa)
+	# test >>
+
+	# affine transfo to png 16 bits, func affects img variable
+	maxdepth = img.max()
+	# print('maxdepth, PNG8_MAX', maxdepth, PNG8_MAX) # testing..
+	affine(img, 
+		(img.min(), maxdepth),
+		(0, PNG16_MAX if maxdepth > PNG8_MAX else PNG8_MAX)
+	)
+	
+	savepath = (outfile or inputfile) + '.png'
+	savedir = getdir(savepath)
+	if overwrite and not isdir( savedir ):
+		pathlib.Path(savedir).mkdir(parents=True, exist_ok=True)
+
+
+	# test <<
+	# tmp = np.array(img) # to get eye on the numpy format of img
+	# tmp = np.array(img) # to get eye on the numpy format of img
+	# print("img[0,0]", img[0,0])
+	# img[0,0] = 0
+	# tmp.dtype = 'uint32'
+	# np.savetxt(savepath + '.npy', img)
+	# test >>
+
+	cv2.imwrite(savepath, img, [cv2.IMWRITE_PNG_COMPRESSION, 0]) # write png image
+
+
+def topngs(inputdir, outdir):
+	"""
+		inputdir : directory which contains directly dicom files
+	"""
+	files = [f for f in os.listdir(inputdir)]
+
+	for f in files:
+		topng( inputdir + f, join(outdir, f) )
+
+if __name__ == '__main__':
+	# topngs( INPUT_DIR, join(OUT_DIR, INPUT_DIR.split('/')[-2]) )
+	topng(INPUT_DIR+'Image00001', OUT_DIR + INPUT_DIR.split('/')[-2] +'-Image00001')
diff --git a/tosampledir.py b/tosampledir.py
new file mode 100644
index 0000000..87effac
--- /dev/null
+++ b/tosampledir.py
@@ -0,0 +1,18 @@
+from os import listdir as ls
+from os.path import join
+from shutil import move as mv
+import pathlib
+
+def move(indir, outdir, n):# move n ordered files from indir to outdir
+	l = sorted(ls(indir))
+	for filename in l[:n]:
+		# print(join(indir, filename), join(outdir, filename))
+		pathlib.Path(outdir).mkdir(parents=True, exist_ok=True)
+		mv(join(indir, filename), join(outdir, filename))
+
+if __name__ == '__main__':
+	
+	move('./train/infarctus', './sample/train/infarctus', 1000) # move 1000 ordered files from param 1 to param 2
+	move('./train/noinfarctus', './sample/train/noinfarctus', 1000)
+	move('./train/infarctus', './sample/valid/infarctus', 300)
+	move('./train/noinfarctus', './sample/valid/noinfarctus', 300)
\ No newline at end of file
diff --git a/totraindir.py b/totraindir.py
new file mode 100644
index 0000000..4917b56
--- /dev/null
+++ b/totraindir.py
@@ -0,0 +1,77 @@
+from os import listdir as ls
+import png
+import pydicom
+from os.path import join
+
+# locals
+from topng import topng
+from regularjson import search, RT_PATH, JSON_GTS, INFA_STR
+
+# constants
+GLOB_DIR = '../../Data/Images_anonymous/'
+OUT_DIR = './train/'
+INDICE_CASE = 'Case'
+
+START = None # to manage the number of Patient Case to use in training + validation, if START == None => 0, if END == None => last index, it will use all in GLOB_DIR
+END = None
+
+
+def get(l, i, r):
+	if len(l) <= 1:
+		return sorted( ls(r) )
+
+if __name__ == '__main__':
+	l = sorted(ls(GLOB_DIR))
+	for cas in l[START:END]:# cas like 'Case0002'
+		caspath = join(GLOB_DIR, cas)
+
+		if not INDICE_CASE in cas:# if the directory name doesn't sound familiar
+			continue
+
+		l1 = sorted( ls( caspath ) )# just ls one CaseXXXX and sort the result
+		# $$NOTRUSTREF here we need to sort because there is no trusted ref or id
+
+		r = search(RT_PATH, cas)
+
+		print(cas, end=' ', flush=True) # log CaseXXXX
+
+		if not r: # if the feature doesn't yet labeled by an expert go to next
+			continue
+		r = search(r['path'], '.') # to be more dynamic, here can just be '.' instead of '1.2.3.4.5.6'
+
+		if r:
+			r = r['path']
+			try:
+				l2 = sorted( ls(r) ) # $$NOTRUSTREF
+			except NotADirectoryError as e:
+				print("WARN", "a file in unexcepted place")
+				continue
+
+			if 0 < len(l2) < len(l1):
+				r = join(r, l2[0])
+				l2 = sorted( ls(r) ) # $$NOTRUSTREF
+				# Try once : check subdirectory if i'am not the right
+
+			if 0 < len(l2) < len(l1):
+				# TODO: log, json doesn't match with images (labels != features)
+				print("WARN", "json doesn't match with images (labels != features), Case", cas)
+				continue
+				pass
+
+
+			for i, dic in enumerate(l1):
+				# print('join', r, l2[i])
+				ref = join(r, l2[i]) # logically, should be the json ref of i dicom image
+
+				infarctus = search(ref, INFA_STR)
+				if infarctus:
+					# print("infarctus:", infarctus) # Testing..
+					# topng(join(caspath, dic), '%/%-%' % (join(OUT_DIR, 'infarctus'), cas, dic)
+					# print(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'infarctus'), cas, dic)) # Testing..
+					topng(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'infarctus'), cas, dic))
+				else:
+					# print("no infarctus:", infarctus) # Testing..
+					# print(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'noinfarctus'), cas, dic)) # Testing..
+					topng(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'noinfarctus'), cas, dic))
+
+	print('Ended!')
\ No newline at end of file
-- 
2.39.5