From c67ab60de95008178e26817946990382751d91d8 Mon Sep 17 00:00:00 2001 From: Bernardo TOD Date: Thu, 23 May 2019 07:52:45 +0200 Subject: [PATCH 1/1] Initialize --- formatdicom.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ helpers.py | 2 + readme.txt | 10 +++++ regularjson.py | 99 +++++++++++++++++++++++++++++++++++++++++++++ topng.py | 97 ++++++++++++++++++++++++++++++++++++++++++++ tosampledir.py | 18 +++++++++ totraindir.py | 77 +++++++++++++++++++++++++++++++++++ 7 files changed, 410 insertions(+) create mode 100644 formatdicom.py create mode 100644 helpers.py create mode 100644 readme.txt create mode 100644 regularjson.py create mode 100644 topng.py create mode 100644 tosampledir.py create mode 100644 totraindir.py diff --git a/formatdicom.py b/formatdicom.py new file mode 100644 index 0000000..54aa277 --- /dev/null +++ b/formatdicom.py @@ -0,0 +1,107 @@ +import os +import png +import dicom +import argparse + + +def mri_to_png(mri_file, png_file): + """ Function to convert from a DICOM image to png + + @param mri_file: An opened file like object to read te dicom data + @param png_file: An opened file like object to write the png data + """ + + # Extracting data from the mri file + plan = dicom.read_file(mri_file) + shape = plan.pixel_array.shape + + image_2d = [] + max_val = 0 + for row in plan.pixel_array: + pixels = [] + for col in row: + pixels.append(col) + if col > max_val: max_val = col + image_2d.append(pixels) + + # Rescaling grey scale between 0-255 + image_2d_scaled = [] + for row in image_2d: + row_scaled = [] + for col in row: + col_scaled = int((float(col) / float(max_val)) * 255.0) + row_scaled.append(col_scaled) + image_2d_scaled.append(row_scaled) + + # Writing the PNG file + w = png.Writer(shape[0], shape[1], greyscale=True) + w.write(png_file, image_2d_scaled) + + +def convert_file(mri_file_path, png_file_path): + """ Function to convert an MRI binary file to a + PNG image file. + + @param mri_file_path: Full path to the mri file + @param png_file_path: Fill path to the png file + """ + + # Making sure that the mri file exists + if not os.path.exists(mri_file_path): + raise Exception('File "%s" does not exists' % mri_file_path) + + # Making sure the png file does not exist + if os.path.exists(png_file_path): + raise Exception('File "%s" already exists' % png_file_path) + + mri_file = open(mri_file_path, 'rb') + png_file = open(png_file_path, 'wb') + + mri_to_png(mri_file, png_file) + + png_file.close() + + +def convert_folder(mri_folder, png_folder): + """ Convert all MRI files in a folder to png files + in a destination folder + """ + + # Create the folder for the pnd directory structure + os.makedirs(png_folder) + + # Recursively traverse all sub-folders in the path + for mri_sub_folder, subdirs, files in os.walk(mri_folder): + for mri_file in os.listdir(mri_sub_folder): + mri_file_path = os.path.join(mri_sub_folder, mri_file) + + # Make sure path is an actual file + if os.path.isfile(mri_file_path): + + # Replicate the original file structure + rel_path = os.path.relpath(mri_sub_folder, mri_folder) + png_folder_path = os.path.join(png_folder, rel_path) + if not os.path.exists(png_folder_path): + os.makedirs(png_folder_path) + png_file_path = os.path.join(png_folder_path, '%s.png' % mri_file) + + try: + # Convert the actual file + convert_file(mri_file_path, png_file_path) + print('SUCCESS>', mri_file_path, '-->', png_file_path) + except Exception as e: + print('FAIL>', mri_file_path, '-->', png_file_path, ':', e) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Convert a dicom MRI file to png") + parser.add_argument('-f', action='store_true') + parser.add_argument('dicom_path', help='Full path to the mri file') + parser.add_argument('png_path', help='Full path to the generated png file') + + args = parser.parse_args() + print(args) + if args.f: + convert_folder(args.dicom_path, args.png_path) + else: + convert_file(args.dicom_path, args.png_path) \ No newline at end of file diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..9784900 --- /dev/null +++ b/helpers.py @@ -0,0 +1,2 @@ +def getdir(filepath): + return '/'.join(filepath.split('/')[:-1]) + '/' diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..28b12aa --- /dev/null +++ b/readme.txt @@ -0,0 +1,10 @@ +# ---------- README + +# Edit constants in regularjson.py file, and execute it +python3 regularjson.py + +# Edit constants in totraindir.py file, and execute it +python3 totraindir.py + +# Edit paths in tosample.py file, and execute it +python3 tosample.py \ No newline at end of file diff --git a/regularjson.py b/regularjson.py new file mode 100644 index 0000000..fa89890 --- /dev/null +++ b/regularjson.py @@ -0,0 +1,99 @@ +""" +by @res + +1) Read -R json_GT* and recup "Feature Refs" & labels (0: Infarctus, 1: No +2) store "Feature Refs" & labels + Format: + { + nb_infarct: 2, + infarcts: [], + no_infarcts: [], + } +""" +from os import listdir as ls +from os.path import getsize as size, join +from pprint import pprint + + +# constants +# json +# |--> json_GT +# |--> json_GT_part2 +RT_PATH = '../../Data/json/' +JSON_GTS = ['json_GT', 'json_GT_part2'] +INFA_STR = 'Infa' + +# Globals +featurerefs = [] +labellist = [] +details = [] + +from os.path import isfile, join + +def search(path, name, recursive=True, fsize=True): + """ + :param path: + :param name: + :param recursive: Recursive or not + :param fsize: show the size of not + :return: {path: p, size: None} + """ + for d in ls(path): + p = join(path, d) + if name in d: + return {'path': p, 'size': size(p) if fsize else None} + elif recursive and not isfile(p):# only enter if it's a directory + res = search(p, name)# stop if found + if res: + return res + +def merge(dirlist, root='', indice='Case'): + l = [] + for d in dirlist: + l += [e for e in ls(join(root, d)) if indice in e] + + return sorted(l) + +def mergejsons(): + return merge(JSON_GTS, RT_PATH) + +def featurerefs(): + """ + fill l with features references + use JSON_GTS, a little bit static in this sens + :return: + """ + global featurerefs + # for d in JSON_GTS: + # #frefs += ls(RT_PATH + d) + # featurerefs += [e for e in ls(join(RT_PATH, d)) if "Case" in e] + + featurerefs = sorted(mergejsons()) + +def label(fref): + res = search(RT_PATH, fref) + b = search(res['path'], INFA_STR) + return (0, b['size']) if b else (1, None) + +def labels(): + global labellist, details + for name in featurerefs: + lab, detail = label(name) + labellist.append(lab) + details.append(detail) + + + +if __name__ == '__main__': + featurerefs() + labels() + + print('featurerefs:', len(featurerefs), featurerefs) + print('labels:', len(labellist), labellist) + print('details:', len(details), details) + + # print( label('Case_02') ) + # print( search(RT_PATH, INFA_STR) ) + + + diff --git a/topng.py b/topng.py new file mode 100644 index 0000000..9651446 --- /dev/null +++ b/topng.py @@ -0,0 +1,97 @@ +import cv2 +import os +from os.path import isdir, join +from os import mkdir +import pydicom +from pprint import pprint +import numpy as np +import pathlib + +from decimal import Decimal as d, ROUND_HALF_UP as rhu + +PNG16_MAX = pow(2, 16) - 1 # here if thinks the heaviest weight bit is for transparency or something not in use with dicom imgs +PNG8_MAX = pow(2, 8+1) - 1 # heaviest weight bit is 8 => 2**8, but dont forget the others: the reason of +1 + +INPUT_DIR = '../../Data/Images_anonymous/Case_0449/' +OUT_DIR = './generated/' +#os.mkdir(outdir) + +def map16(array):# can be useful in future + return array * 16 + +# def dround(n): +# return d(str(n)).quantize(d('1'), rounding=rhu)# safe round without + +def affine(Mat, ab, cd): + """ + Affine transformation + ab: the 'from' interval (2, 384) or {begin:2, end:384} begin is 0, and end is 1 + cd: the 'to' interval (0, 1024) {begin:0, end:1024} + """ + a, b = ab[0], ab[1] + c, d = cd[0], cd[1] + + with np.nditer(Mat, op_flags=['readwrite']) as M: + for x in M: + x[...] = max( 0, round( (x-a) * (d-c) / (b-a) + c ) ) # could not be negative + + +def getdir(filepath): + return '/'.join(filepath.split('/')[:-1]) + '/' + +def topng(inputfile, outfile=None, overwrite=True): + try: + dicimg = pydicom.read_file(inputfile) # read dicom image + except pydicom.errors.InvalidDicomError as e: + # @TODO: log, i can't read this file + return + img = dicimg.pixel_array# get image array (12bits) + + # test << + print('img', img) + # print('img', type(img)) + # print('min', img.min(), 'max', img.max()) + # dicimg.convert_pixel_data() # same as using dicimg.pixel_array + # pixa = dicimg._pixel_array + # print('dicimg._pixel_array', pixa) + # print('dicimg.pixel_array==pixa', dicimg.pixel_array==pixa) + # test >> + + # affine transfo to png 16 bits, func affects img variable + maxdepth = img.max() + # print('maxdepth, PNG8_MAX', maxdepth, PNG8_MAX) # testing.. + affine(img, + (img.min(), maxdepth), + (0, PNG16_MAX if maxdepth > PNG8_MAX else PNG8_MAX) + ) + + savepath = (outfile or inputfile) + '.png' + savedir = getdir(savepath) + if overwrite and not isdir( savedir ): + pathlib.Path(savedir).mkdir(parents=True, exist_ok=True) + + + # test << + # tmp = np.array(img) # to get eye on the numpy format of img + # tmp = np.array(img) # to get eye on the numpy format of img + # print("img[0,0]", img[0,0]) + # img[0,0] = 0 + # tmp.dtype = 'uint32' + # np.savetxt(savepath + '.npy', img) + # test >> + + cv2.imwrite(savepath, img, [cv2.IMWRITE_PNG_COMPRESSION, 0]) # write png image + + +def topngs(inputdir, outdir): + """ + inputdir : directory which contains directly dicom files + """ + files = [f for f in os.listdir(inputdir)] + + for f in files: + topng( inputdir + f, join(outdir, f) ) + +if __name__ == '__main__': + # topngs( INPUT_DIR, join(OUT_DIR, INPUT_DIR.split('/')[-2]) ) + topng(INPUT_DIR+'Image00001', OUT_DIR + INPUT_DIR.split('/')[-2] +'-Image00001') diff --git a/tosampledir.py b/tosampledir.py new file mode 100644 index 0000000..87effac --- /dev/null +++ b/tosampledir.py @@ -0,0 +1,18 @@ +from os import listdir as ls +from os.path import join +from shutil import move as mv +import pathlib + +def move(indir, outdir, n):# move n ordered files from indir to outdir + l = sorted(ls(indir)) + for filename in l[:n]: + # print(join(indir, filename), join(outdir, filename)) + pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) + mv(join(indir, filename), join(outdir, filename)) + +if __name__ == '__main__': + + move('./train/infarctus', './sample/train/infarctus', 1000) # move 1000 ordered files from param 1 to param 2 + move('./train/noinfarctus', './sample/train/noinfarctus', 1000) + move('./train/infarctus', './sample/valid/infarctus', 300) + move('./train/noinfarctus', './sample/valid/noinfarctus', 300) \ No newline at end of file diff --git a/totraindir.py b/totraindir.py new file mode 100644 index 0000000..4917b56 --- /dev/null +++ b/totraindir.py @@ -0,0 +1,77 @@ +from os import listdir as ls +import png +import pydicom +from os.path import join + +# locals +from topng import topng +from regularjson import search, RT_PATH, JSON_GTS, INFA_STR + +# constants +GLOB_DIR = '../../Data/Images_anonymous/' +OUT_DIR = './train/' +INDICE_CASE = 'Case' + +START = None # to manage the number of Patient Case to use in training + validation, if START == None => 0, if END == None => last index, it will use all in GLOB_DIR +END = None + + +def get(l, i, r): + if len(l) <= 1: + return sorted( ls(r) ) + +if __name__ == '__main__': + l = sorted(ls(GLOB_DIR)) + for cas in l[START:END]:# cas like 'Case0002' + caspath = join(GLOB_DIR, cas) + + if not INDICE_CASE in cas:# if the directory name doesn't sound familiar + continue + + l1 = sorted( ls( caspath ) )# just ls one CaseXXXX and sort the result + # $$NOTRUSTREF here we need to sort because there is no trusted ref or id + + r = search(RT_PATH, cas) + + print(cas, end=' ', flush=True) # log CaseXXXX + + if not r: # if the feature doesn't yet labeled by an expert go to next + continue + r = search(r['path'], '.') # to be more dynamic, here can just be '.' instead of '1.2.3.4.5.6' + + if r: + r = r['path'] + try: + l2 = sorted( ls(r) ) # $$NOTRUSTREF + except NotADirectoryError as e: + print("WARN", "a file in unexcepted place") + continue + + if 0 < len(l2) < len(l1): + r = join(r, l2[0]) + l2 = sorted( ls(r) ) # $$NOTRUSTREF + # Try once : check subdirectory if i'am not the right + + if 0 < len(l2) < len(l1): + # TODO: log, json doesn't match with images (labels != features) + print("WARN", "json doesn't match with images (labels != features), Case", cas) + continue + pass + + + for i, dic in enumerate(l1): + # print('join', r, l2[i]) + ref = join(r, l2[i]) # logically, should be the json ref of i dicom image + + infarctus = search(ref, INFA_STR) + if infarctus: + # print("infarctus:", infarctus) # Testing.. + # topng(join(caspath, dic), '%/%-%' % (join(OUT_DIR, 'infarctus'), cas, dic) + # print(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'infarctus'), cas, dic)) # Testing.. + topng(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'infarctus'), cas, dic)) + else: + # print("no infarctus:", infarctus) # Testing.. + # print(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'noinfarctus'), cas, dic)) # Testing.. + topng(join(caspath, dic), '{}/{}-{}'.format(join(OUT_DIR, 'noinfarctus'), cas, dic)) + + print('Ended!') \ No newline at end of file -- 2.39.5