diff --git a/docs/src/changes.rst b/docs/src/changes.rst
index d9bb793e..5335df39 100644
--- a/docs/src/changes.rst
+++ b/docs/src/changes.rst
@@ -4,6 +4,27 @@
 Change Log
 ===========================================================================
 
+Changes in version 0.0.15
+--------------------------
+
+Fixes:
+~~~~~~~
+
+* `138 <https://github.com/pymupdf/RAG/issues/138>`_ "Table is not extracted and some text order was wrong."
+* `135 <https://github.com/pymupdf/RAG/issues/135>`_ "Problem with multiple columns in simple text."
+* `134 <https://github.com/pymupdf/RAG/issues/134>`_ "Exclude images based on size threshold parameter."
+* `132 <https://github.com/pymupdf/RAG/issues/132>`_ "Optionally embed images as base64 string."
+* `128 <https://github.com/pymupdf/RAG/issues/128>`_ "Enhanced image embedding format."
+
+
+Improvements:
+~~~~~~~~~~~~~~
+* New parameter `embed_images` (bool) **embeds** images and vector graphics in the markdown text as base64-encoded strings. Ignores `write_images` and `image_path` parameters.
+* New parameter `image_size_limit` which is a float between 0 and 1, default is 0.05 (5%). Causes images to be ignored if their width or height values are smaller than the corresponding fraction of the page's width or height.
+* The algorithm has been improved which determins the sequence of the text rectangles on multi-column pages.
+* Change of the header identification algorithm: If more than six header levels are required for a document, then all text with a font size larger than body text is assumed to be a header of level 6 (i.e. HTML "h6" = "###### ").
+
+
 Changes in version 0.0.13
 --------------------------
 
@@ -19,7 +40,6 @@ Improvements:
 * New parameter `extract_words` enforces `page_chunks=True` and adds a "words" list to each page dictionary.
 
 
-
 Changes in version 0.0.11
 --------------------------
 
diff --git a/pymupdf4llm/pymupdf4llm/__init__.py b/pymupdf4llm/pymupdf4llm/__init__.py
index 75f6156c..512fbeb8 100644
--- a/pymupdf4llm/pymupdf4llm/__init__.py
+++ b/pymupdf4llm/pymupdf4llm/__init__.py
@@ -1,6 +1,6 @@
 from .helpers.pymupdf_rag import IdentifyHeaders, to_markdown
 
-__version__ = "0.0.14"
+__version__ = "0.0.15"
 version = __version__
 version_tuple = tuple(map(int, version.split(".")))
 
diff --git a/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py b/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py
index fb134b15..8af6a1e8 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py
@@ -34,7 +34,8 @@ def get_raw_lines(textpage, clip=None, tolerance=3):
     Result is a sorted list of line objects that consist of the recomputed line
     boundary box and the sorted list of spans in that line.
 
-    This result can then easily be converted e.g. to plain or markdown text.
+    This result can then easily be converted e.g. to plain text and other
+    formats like Markdown or JSON.
 
     Args:
         textpage: (mandatory) TextPage object
@@ -45,7 +46,7 @@ def get_raw_lines(textpage, clip=None, tolerance=3):
 
     Returns:
         A sorted list of items (rect, [spans]), each representing one line. The
-        spans are sorted left to right, Span dictionaries have been changed:
+        spans are sorted left to right. Span dictionaries have been changed:
         - "bbox" has been converted to a Rect object
         - "line" (new) the line number in TextPage.extractDICT
         - "block" (new) the block number in TextPage.extractDICT
@@ -98,7 +99,7 @@ def sanitize_spans(line):
     spans = []  # all spans in TextPage here
     for bno, b in enumerate(blocks):  # the numbered blocks
         for lno, line in enumerate(b["lines"]):  # the numbered lines
-            if abs(1-line["dir"][0]) > 1e-3:  # only accept horizontal text
+            if abs(1 - line["dir"][0]) > 1e-3:  # only accept horizontal text
                 continue
             for sno, s in enumerate(line["spans"]):  # the numered spans
                 sbbox = pymupdf.Rect(s["bbox"])  # span bbox as a Rect
@@ -131,7 +132,10 @@ def sanitize_spans(line):
         sbbox = s["bbox"]  # this bbox
         sbbox0 = line[-1]["bbox"]  # previous bbox
         # if any of top or bottom coordinates are close enough, join...
-        if abs(sbbox.y1 - sbbox0.y1) <= y_delta or abs(sbbox.y0 - sbbox0.y0) <= y_delta:
+        if (
+            abs(sbbox.y1 - sbbox0.y1) <= y_delta
+            or abs(sbbox.y0 - sbbox0.y0) <= y_delta
+        ):
             line.append(s)  # append to this line
             lrect |= sbbox  # extend line rectangle
             continue
@@ -152,7 +156,9 @@ def sanitize_spans(line):
     return nlines
 
 
-def get_text_lines(page, *, textpage=None, clip=None, sep="\t", tolerance=3, ocr=False):
+def get_text_lines(
+    page, *, textpage=None, clip=None, sep="\t", tolerance=3, ocr=False
+):
     """Extract text by line keeping natural reading sequence.
 
     Notes:
diff --git a/pymupdf4llm/pymupdf4llm/helpers/multi_column.py b/pymupdf4llm/pymupdf4llm/helpers/multi_column.py
index 8580b892..3c96bcb0 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/multi_column.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/multi_column.py
@@ -64,6 +64,8 @@
 
 import pymupdf
 
+pymupdf.TOOLS.set_small_glyph_heights(True)
+
 
 def column_boxes(
     page,
@@ -91,36 +93,11 @@ def is_white(text):
         """Check for relevant text."""
         return WHITE.issuperset(text)
 
-    # compute relevant page area
-    clip = +page.rect
-    clip.y1 -= footer_margin  # Remove footer area
-    clip.y0 += header_margin  # Remove header area
-
-    if paths is None:
-        paths = page.get_drawings()
-
-    if textpage is None:
-        textpage = page.get_textpage(clip=clip, flags=pymupdf.TEXTFLAGS_TEXT)
-
-    bboxes = []
-
-    # path rectangles
-    path_rects = []
-
-    # image bboxes
-    img_bboxes = []
-    if avoid is not None:
-        img_bboxes.extend(avoid)
-
-    # bboxes of non-horizontal text
-    # avoid when expanding horizontal text boxes
-    vert_bboxes = []
-
     def in_bbox(bb, bboxes):
         """Return 1-based number if a bbox contains bb, else return 0."""
-        for i, bbox in enumerate(bboxes):
+        for i, bbox in enumerate(bboxes, start=1):
             if bb in bbox:
-                return i + 1
+                return i
         return 0
 
     def intersects_bboxes(bb, bboxes):
@@ -132,7 +109,8 @@ def intersects_bboxes(bb, bboxes):
 
     def can_extend(temp, bb, bboxlist, vert_bboxes):
         """Determines whether rectangle 'temp' can be extended by 'bb'
-        without intersecting any of the rectangles contained in 'bboxlist'.
+        without intersecting any of the rectangles contained in 'bboxlist'
+        or 'vert_bboxes'.
 
         Items of bboxlist may be None if they have been removed.
 
@@ -148,6 +126,42 @@ def can_extend(temp, bb, bboxlist, vert_bboxes):
 
         return True
 
+    def clean_nblocks(nblocks):
+        """Do some elementary cleaning."""
+
+        # 1. remove any duplicate blocks.
+        blen = len(nblocks)
+        if blen < 2:
+            return nblocks
+        start = blen - 1
+        for i in range(start, -1, -1):
+            bb1 = nblocks[i]
+            bb0 = nblocks[i - 1]
+            if bb0 == bb1:
+                del nblocks[i]
+
+        # 2. repair sequence in special cases:
+        # consecutive bboxes with almost same bottom value are sorted ascending
+        # by x-coordinate.
+        y1 = nblocks[0].y1  # first bottom coordinate
+        i0 = 0  # its index
+        i1 = -1  # index of last bbox with same bottom
+
+        # Iterate over bboxes, identifying segments with approx. same bottom value.
+        # Replace every segment by its sorted version.
+        for i in range(1, len(nblocks)):
+            b1 = nblocks[i]
+            if abs(b1.y1 - y1) > 3:  # different bottom
+                if i1 > i0:  # segment length > 1? Sort it!
+                    nblocks[i0 : i1 + 1] = sorted(
+                        nblocks[i0 : i1 + 1], key=lambda b: b.x0
+                    )
+                y1 = b1.y1  # store new bottom value
+                i0 = i  # store its start index
+            i1 = i  # store current index
+        if i1 > i0:  # segment waiting to be sorted
+            nblocks[i0 : i1 + 1] = sorted(nblocks[i0 : i1 + 1], key=lambda b: b.x0)
+        return nblocks
 
     def join_rects_phase1(bboxes):
         """Postprocess identified text blocks, phase 1.
@@ -156,7 +170,7 @@ def join_rects_phase1(bboxes):
         This means that their intersection is valid (but may be empty).
         To prefer vertical joins, we will ignore small horizontal gaps.
         """
-        delta=(0,-3,0,3)  # allow thid gap above and below
+        delta = (0, 0, 0, 2)  # allow this gap below
         prects = bboxes[:]
         new_rects = []
         while prects:
@@ -165,7 +179,7 @@ def join_rects_phase1(bboxes):
             while repeat:
                 repeat = False
                 for i in range(len(prects) - 1, 0, -1):
-                    if ((prect0+delta) & (prects[i]+delta)).is_valid:
+                    if not ((prect0 + delta) & prects[i]).is_empty:
                         prect0 |= prects[i]
                         del prects[i]
                         repeat = True
@@ -211,10 +225,10 @@ def join_rects_phase2(bboxes):
             new_rects.append(r)
         return new_rects
 
-    def join_rects_phase3(bboxes):
+    def join_rects_phase3(bboxes, path_rects):
         prects = bboxes[:]
-        prects.sort(key=lambda b: (b.x0, b.y0))
         new_rects = []
+
         while prects:
             prect0 = prects[0]
             repeat = True
@@ -222,15 +236,15 @@ def join_rects_phase3(bboxes):
                 repeat = False
                 for i in range(len(prects) - 1, 0, -1):
                     prect1 = prects[i]
+                    # do not join across columns
                     if prect1.x0 > prect0.x1 or prect1.x1 < prect0.x0:
                         continue
-                    temp = prect0 | prects[i]
+                    # do not join different backgrounds
+                    if in_bbox(prect0, path_rects) != in_bbox(prect1, path_rects):
+                        continue
+                    temp = prect0 | prect1
                     test = set(
-                        [
-                            tuple(b)
-                            for b in prects + new_rects
-                            if b.intersects(temp)
-                        ]
+                        [tuple(b) for b in prects + new_rects if b.intersects(temp)]
                     )
                     if test == set((tuple(prect0), tuple(prect1))):
                         prect0 |= prect1
@@ -238,55 +252,101 @@ def join_rects_phase3(bboxes):
                         repeat = True
             new_rects.append(prect0)
             del prects[0]
-        new_rects.sort(key=lambda b: (b.y0, b.x0))
-        return new_rects
 
-    def clean_nblocks(nblocks):
-        """Do some elementary cleaning."""
+        """
+        Hopefully the most reasonable sorting sequence:
+        At this point we have finished identifying blocks that wrap text.
+        We now need to determine the SEQUENCE by which text extraction from
+        these blocks should take place. This is hardly possible with 100%
+        certainty. Our sorting approach is guided by the following thought:
+        1. Extraction should start with the block whose top-left corner is the
+           left-most and top-most.
+        2. Any blocks further to the right should be extracted later - even if
+           their top-left corner is higher up on the page.
+        3. Sorting the identified rectangles must therefore happen using a
+           tuple (y, x) as key, where y is not smaller (= higher up) than that
+           of the left-most block with a non-empty vertical overlap.
+        4. To continue "left block" with "next is ...", its sort key must be
+                          Q +---------+    tuple (P.y, Q.x).
+                            | next is |
+              P +-------+   |  this   |
+                | left  |   |  block  |
+                | block |   +---------+
+                +-------+
+        """
+        sort_rects = []  # copy of "new_rects" with a computed sort key
+        for box in new_rects:
+            # search for the left-most rect that overlaps like "P" above
+            # candidates must have the same background
+            background = in_bbox(box, path_rects)  # this background
+            left_rects = sorted(
+                [
+                    r
+                    for r in new_rects
+                    if r.x1 < box.x0
+                    and (box.y0 <= r.y0 <= box.y1 or box.y0 <= r.y1 <= box.y1)
+                    # and in_bbox(r, path_rects) == background
+                ],
+                key=lambda r: r.x1,
+            )
+            if left_rects:  # if a "P" rectangle was found ...
+                key = (left_rects[-1].y0, box.x0)  # use this key
+            else:
+                key = (box.y0, box.x0)  # else use the original (Q.y, Q.x).
+            sort_rects.append((box, key))
+        sort_rects.sort(key=lambda sr: sr[1])  # by computed key
+        new_rects = [sr[0] for sr in sort_rects]  # extract sorted rectangles
+
+        # move shaded text rects into a separate list
+        shadow_rects = []
+        # for i in range(len(new_rects) - 1, 0, -1):
+        #     r = +new_rects[i]
+        #     if in_bbox(r, path_rects):  # text with shaded background
+        #         shadow_rects.insert(0, r)  # put in front to keep sequence
+        #         del new_rects[i]
+        return new_rects + shadow_rects
 
-        # 1. remove any duplicate blocks.
-        blen = len(nblocks)
-        if blen < 2:
-            return nblocks
-        start = blen - 1
-        for i in range(start, -1, -1):
-            bb1 = nblocks[i]
-            bb0 = nblocks[i - 1]
-            if bb0 == bb1:
-                del nblocks[i]
+    # compute relevant page area
+    clip = +page.rect
+    clip.y1 -= footer_margin  # Remove footer area
+    clip.y0 += header_margin  # Remove header area
 
-        # 2. repair sequence in special cases:
-        # consecutive bboxes with almost same bottom value are sorted ascending
-        # by x-coordinate.
-        y1 = nblocks[0].y1  # first bottom coordinate
-        i0 = 0  # its index
-        i1 = -1  # index of last bbox with same bottom
+    paths = [
+        p
+        for p in page.get_drawings()
+        if p["rect"].width < clip.width and p["rect"].height < clip.height
+    ]
 
-        # Iterate over bboxes, identifying segments with approx. same bottom value.
-        # Replace every segment by its sorted version.
-        for i in range(1, len(nblocks)):
-            b1 = nblocks[i]
-            if abs(b1.y1 - y1) > 10:  # different bottom
-                if i1 > i0:  # segment length > 1? Sort it!
-                    nblocks[i0 : i1 + 1] = sorted(
-                        nblocks[i0 : i1 + 1], key=lambda b: b.x0
-                    )
-                y1 = b1.y1  # store new bottom value
-                i0 = i  # store its start index
-            i1 = i  # store current index
-        if i1 > i0:  # segment waiting to be sorted
-            nblocks[i0 : i1 + 1] = sorted(
-                nblocks[i0 : i1 + 1], key=lambda b: b.x0
-            )
-        return nblocks
+    if textpage is None:
+        textpage = page.get_textpage(clip=clip, flags=pymupdf.TEXTFLAGS_TEXT)
+
+    bboxes = []
+
+    # image bboxes
+    img_bboxes = []
+    if avoid is not None:
+        img_bboxes.extend(avoid)
+
+    # non-horizontal text boxes, avoid when expanding other text boxes
+    vert_bboxes = []
 
-    # extract vector graphics
+    # path rectangles
+    path_rects = []
     for p in paths:
-        path_rects.append(p["rect"].irect)
-    path_bboxes = path_rects
+        # give empty path rectangles some small width or height
+        prect = p["rect"]
+        lwidth = 0.5 if (_ := p["width"]) is None else _ * 0.5
+
+        if prect.width == 0:
+            prect.x0 -= lwidth
+            prect.x1 += lwidth
+        if prect.height == 0:
+            prect.y0 -= lwidth
+            prect.y1 += lwidth
+        path_rects.append(prect)
 
     # sort path bboxes by ascending top, then left coordinates
-    path_bboxes.sort(key=lambda b: (b.y0, b.x0))
+    path_rects.sort(key=lambda b: (b.y0, b.x0))
 
     # bboxes of images on page, no need to sort them
     for item in page.get_images():
@@ -297,7 +357,7 @@ def clean_nblocks(nblocks):
 
     # Make block rectangles, ignoring non-horizontal text
     for b in blocks:
-        bbox = pymupdf.IRect(b["bbox"])  # bbox of the block
+        bbox = pymupdf.Rect(b["bbox"])  # bbox of the block
 
         # ignore text written upon images
         if no_image_text and in_bbox(bbox, img_bboxes):
@@ -309,15 +369,15 @@ def clean_nblocks(nblocks):
         except IndexError:
             continue
 
-        if line0["dir"] != (1, 0):  # only accept horizontal text
-            vert_bboxes.append(bbox)
+        if abs(1 - line0["dir"][0]) > 1e-3:  # only (almost) horizontal text
+            vert_bboxes.append(bbox)  # a block with non-horizontal text
             continue
 
-        srect = pymupdf.EMPTY_IRECT()
+        srect = pymupdf.EMPTY_RECT()
         for line in b["lines"]:
-            lbbox = pymupdf.IRect(line["bbox"])
-            text = "".join([s["text"].strip() for s in line["spans"]])
-            if len(text) > 1:
+            lbbox = pymupdf.Rect(line["bbox"])
+            text = "".join([s["text"] for s in line["spans"]])
+            if not is_white(text):
                 srect |= lbbox
         bbox = +srect
 
@@ -325,12 +385,7 @@ def clean_nblocks(nblocks):
             bboxes.append(bbox)
 
     # Sort text bboxes by ascending background, top, then left coordinates
-    bboxes.sort(key=lambda k: (in_bbox(k, path_bboxes), k.y0, k.x0))
-
-    # Extend bboxes to the right where possible
-    # bboxes = extend_right(
-    #     bboxes, int(page.rect.width), path_bboxes, vert_bboxes, img_bboxes
-    # )
+    bboxes.sort(key=lambda k: (in_bbox(k, path_rects), k.y0, k.x0))
 
     # immediately return of no text found
     if bboxes == []:
@@ -351,16 +406,16 @@ def clean_nblocks(nblocks):
             nbb = nblocks[j]  # a new block
 
             # never join across columns
-            if bb == None or nbb.x1 < bb.x0 or bb.x1 < nbb.x0:
+            if bb is None or nbb.x1 < bb.x0 or bb.x1 < nbb.x0:
                 continue
 
             # never join across different background colors
-            if in_bbox(nbb, path_bboxes) != in_bbox(bb, path_bboxes):
+            if in_bbox(nbb, path_rects) != in_bbox(bb, path_rects):
                 continue
 
             temp = bb | nbb  # temporary extension of new block
             check = can_extend(temp, nbb, nblocks, vert_bboxes)
-            if check == True:
+            if check is True:
                 break
 
         if not check:  # bb cannot be used to extend any of the new bboxes
@@ -370,7 +425,7 @@ def clean_nblocks(nblocks):
 
         # check if some remaining bbox is contained in temp
         check = can_extend(temp, bb, bboxes, vert_bboxes)
-        if check == False:
+        if check is False:
             nblocks.append(bb)
         else:
             nblocks[j] = temp
@@ -378,10 +433,11 @@ def clean_nblocks(nblocks):
 
     # do some elementary cleaning
     nblocks = clean_nblocks(nblocks)
-    # final joining of overlapping rectangles
+
+    # several phases of rectangle joining
     nblocks = join_rects_phase1(nblocks)
     nblocks = join_rects_phase2(nblocks)
-    nblocks = join_rects_phase3(nblocks)
+    nblocks = join_rects_phase3(nblocks, path_rects)
 
     # return identified text bboxes
     return nblocks
diff --git a/pymupdf4llm/pymupdf4llm/helpers/progress.py b/pymupdf4llm/pymupdf4llm/helpers/progress.py
index db671de6..e71e601a 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/progress.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/progress.py
@@ -13,7 +13,7 @@
 """
 
 import sys
-from typing import List, Any
+from typing import Any, List
 
 
 class _ProgressBar:
@@ -29,9 +29,13 @@ def __init__(self, items: List[Any], progress_width: int = 40):
         self._increment = self._progress_width / self._len if self._len else 1
 
         # Init progress bar
-        sys.stdout.write("[%s] (0/%d)" % (" " * self._progress_width, self._len))
+        sys.stdout.write(
+            "[%s] (0/%d)" % (" " * self._progress_width, self._len)
+        )
         sys.stdout.flush()
-        sys.stdout.write("\b" * (self._progress_width + len(str(self._len)) + 6))
+        sys.stdout.write(
+            "\b" * (self._progress_width + len(str(self._len)) + 6)
+        )
 
     def __iter__(self):
         return self
@@ -57,7 +61,9 @@ def __next__(self):
         # Update the numerical progress
         padded_index = str(self._current_index).rjust(self._len_digits)
         progress_info = f" ({padded_index}/{self._len})"
-        sys.stdout.write("\b" * (self._progress_width + len(progress_info) + 1))
+        sys.stdout.write(
+            "\b" * (self._progress_width + len(progress_info) + 1)
+        )
         sys.stdout.write("[")
         sys.stdout.write(
             "=" * int(self._current_index * self._progress_width / self._len)
diff --git a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
index 03e12fc6..a0778b4d 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
@@ -28,9 +28,8 @@
 
 import os
 import string
-
+from binascii import b2a_base64
 import pymupdf
-
 from pymupdf4llm.helpers.get_text_lines import get_raw_lines, is_white
 from pymupdf4llm.helpers.multi_column import column_boxes
 from pymupdf4llm.helpers.progress import ProgressBar
@@ -106,13 +105,13 @@ def __init__(
             reverse=True,
         )
         if temp:
-            b_limit = max(body_limit, temp[0][0])
+            self.body_limit = min(body_limit, temp[0][0])
         else:
-            b_limit = body_limit
+            self.body_limit = body_limit
 
         # identify up to 6 font sizes as header candidates
         sizes = sorted(
-            [f for f in fontsizes.keys() if f > b_limit],
+            [f for f in fontsizes.keys() if f > self.body_limit],
             reverse=True,
         )[:6]
 
@@ -128,6 +127,8 @@ def get_header_id(self, span: dict, page=None) -> str:
         """
         fontsize = round(span["size"])  # compute fontsize
         hdr_id = self.header_id.get(fontsize, "")
+        if not hdr_id and fontsize > self.body_limit:
+            hdr_id = "###### "
         return hdr_id
 
 
@@ -136,15 +137,17 @@ def poly_area(points):
 
     We are using the "shoelace" algorithm (Gauss) for this.
     """
+    # make a local copy of points (avoid changing the original)
+    pts = points[:]
     # remove duplicated connector points first
-    for i in range(len(points) - 1, 0, -1):
-        if points[i] == points[i - 1]:
-            del points[i]
+    for i in range(len(pts) - 1, 0, -1):
+        if pts[i] == pts[i - 1]:
+            del pts[i]
 
     area = 0
-    for i in range(len(points) - 1):
-        p0 = pymupdf.Point(points[i])
-        p1 = pymupdf.Point(points[i + 1])
+    for i in range(len(pts) - 1):
+        p0 = pymupdf.Point(pts[i])
+        p1 = pymupdf.Point(pts[i + 1])
         area += p0.x * p1.y - p1.x * p0.y
     return abs(area) / 2
 
@@ -177,7 +180,7 @@ def is_significant(box, paths):
     """Check whether the rectangle "box" contains 'signifiant' drawings.
 
     For this to be true, at least one path must cover an area,
-    which is less than 90% of box. Otherwise we assume
+    which is smaller than 90% of box. Otherwise we assume
     that the graphic is decoration (highlighting, border-only etc.).
     """
     box_area = abs(box) * 0.9  # 90% of area of box
@@ -216,8 +219,10 @@ def to_markdown(
     pages: list = None,
     hdr_info=None,
     write_images=False,
+    embed_images=False,
     image_path="",
     image_format="png",
+    image_size_limit=0.05,
     force_text=True,
     page_chunks=False,
     margins=(0, 50, 0, 50),
@@ -226,6 +231,7 @@ def to_markdown(
     page_height=None,
     table_strategy="lines_strict",
     graphics_limit=None,
+    fontsize_limit=3,
     ignore_code=False,
     extract_words=False,
     show_progress=True,
@@ -237,6 +243,7 @@ def to_markdown(
         pages: list of page numbers to consider (0-based).
         hdr_info: callable or object having a method named 'get_hdr_info'.
         write_images: (bool) whether to save images / drawing as files.
+        embed_images: (bool) embed images as base64 encoded strings
         image_path: (str) folder into which images should be stored.
         image_format: (str) desired image format. Choose a supported one.
         force_text: (bool) output text despite of background.
@@ -252,19 +259,27 @@ def to_markdown(
         show_progress: (bool) print progress as each page is processed.
 
     """
-    if write_images is False and force_text is False:
-        raise ValueError("Image and text output cannot both be suppressed.")
+    if write_images is False and embed_images is False and force_text is False:
+        raise ValueError("Image and text on images cannot both be suppressed.")
+    if embed_images is True:
+        write_images = False
+        image_path = ""
+    if not 0 < image_size_limit < 1:
+        raise ValueError("'image_size_limit' must be positive and less than 1.")
     DPI = dpi
     IGNORE_CODE = ignore_code
     IMG_EXTENSION = image_format
     EXTRACT_WORDS = extract_words
     if EXTRACT_WORDS is True:
         page_chunks = True
+        ignore_code = True
     IMG_PATH = image_path
     if IMG_PATH and write_images is True and not os.path.exists(IMG_PATH):
         os.mkdir(IMG_PATH)
 
     GRAPHICS_LIMIT = graphics_limit
+    FONTSIZE_LIMIT = fontsize_limit
+
     if not isinstance(doc, pymupdf.Document):
         doc = pymupdf.open(doc)
 
@@ -327,19 +342,32 @@ def resolve_links(links, span):
     def save_image(page, rect, i):
         """Optionally render the rect part of a page.
 
-        We will always ignore images with an edge smaller than 5%
-        of the corresponding page edge."""
-        if rect.width < page.rect.width * 0.05 or rect.height < page.rect.height * 0.05:
+        We will ignore images that are empty or that have an edge smaller
+        than x% of the corresponding page edge."""
+
+        if (
+            rect.width < page.rect.width * image_size_limit
+            or rect.height < page.rect.height * image_size_limit
+        ):
             return ""
-        filename = os.path.basename(page.parent.name)
-        image_filename = os.path.join(
-            image_path, f"{filename}-{page.number}-{i}.{IMG_EXTENSION}"
-        )
-        if write_images is True:
+        if write_images is True or embed_images is True:
             pix = page.get_pixmap(clip=rect, dpi=DPI)
-            if pix.height > 0 and pix.width > 0:
-                pix.save(image_filename)
-                return image_filename.replace("\\", "/")
+        else:
+            return ""
+        if pix.height <= 0 or pix.width <= 0:
+            return ""
+
+        if write_images is True:
+            filename = os.path.basename(page.parent.name).replace(" ", "-")
+            image_filename = os.path.join(
+                IMG_PATH, f"{filename}-{page.number}-{i}.{IMG_EXTENSION}"
+            )
+            return image_filename.replace("\\", "/")
+        elif embed_images is True:
+            # make a bas64 encoded string of the image
+            data = b2a_base64(pix.tobytes(IMG_EXTENSION)).decode()
+            data = f"data:image/{IMG_EXTENSION};base64," + data
+            return data
         return ""
 
     def write_text(
@@ -380,6 +408,9 @@ def write_text(
 
         tab_rects0 = list(tab_rects.values())
         img_rects0 = list(img_rects.values())
+        line_rects.extend(
+            [l[0] for l in nlines if not intersects_rects(l[0], tab_rects0)]
+        )  # store line rectangles
 
         prev_lrect = None  # previous line rectangle
         prev_bno = -1  # previous block number of line
@@ -405,13 +436,19 @@ def write_text(
                 key=lambda j: (j[1].y1, j[1].x0),
             ):
                 out_string += "\n" + tabs[i].to_markdown(clean=False) + "\n"
-                if EXTRACT_WORDS:  # determine raw line rects within this table
-                    line_rects.extend(
-                        [
-                            pymupdf.Rect(rl[0])
-                            for rl in get_raw_lines(textpage, clip=tab_rects[i])
-                        ]
+                if EXTRACT_WORDS:
+                    # for "words" extraction, add table cells as line rects
+                    cells = sorted(
+                        set(
+                            [
+                                pymupdf.Rect(c)
+                                for c in tabs[i].header.cells + tabs[i].cells
+                                if c is not None
+                            ]
+                        ),
+                        key=lambda c: (c.y1, c.x0),
                     )
+                    line_rects.extend(cells)
                 del tab_rects[i]
 
             # ------------------------------------------------------------
@@ -566,28 +603,40 @@ def output_tables(tabs, text_rect, tab_rects, line_rects, textpage):
                 key=lambda j: (j[1].y1, j[1].x0),
             ):
                 this_md += tabs[i].to_markdown(clean=False)
-                if EXTRACT_WORDS:  # determine raw line rects within this table
-                    line_rects.extend(
-                        [
-                            pymupdf.Rect(rl[0])
-                            for rl in get_raw_lines(textpage, clip=tab_rects[i])
-                        ]
+                if EXTRACT_WORDS:
+                    # for "words" extraction, add table cells as line rects
+                    cells = sorted(
+                        set(
+                            [
+                                pymupdf.Rect(c)
+                                for c in tabs[i].header.cells + tabs[i].cells
+                                if c is not None
+                            ]
+                        ),
+                        key=lambda c: (c.y1, c.x0),
                     )
+                    line_rects.extend(cells)
                 del tab_rects[i]  # do not touch this table twice
 
-        else:  # output all remaining table
+        else:  # output all remaining tables
             for i, trect in sorted(
                 tab_rects.items(),
                 key=lambda j: (j[1].y1, j[1].x0),
             ):
                 this_md += tabs[i].to_markdown(clean=False)
-                if EXTRACT_WORDS:  # determine raw line rects within this table
-                    line_rects.extend(
-                        [
-                            pymupdf.Rect(rl[0])
-                            for rl in get_raw_lines(textpage, clip=tab_rects[i])
-                        ]
+                if EXTRACT_WORDS:
+                    # for "words" extraction, add table cells as line rects
+                    cells = sorted(
+                        set(
+                            [
+                                pymupdf.Rect(c)
+                                for c in tabs[i].header.cells + tabs[i].cells
+                                if c is not None
+                            ]
+                        ),
+                        key=lambda c: (c.y1, c.x0),
                     )
+                    line_rects.extend(cells)
                 del tab_rects[i]  # do not touch this table twice
         return this_md
 
@@ -653,6 +702,23 @@ def get_metadata(doc, pno):
         meta["page"] = pno + 1
         return meta
 
+    def sort_words(words):
+        nwords = []
+        line = [words[0]]
+        lrect = pymupdf.Rect(words[0][:4])
+        for w in words[1:]:
+            if abs(w[1] - lrect.y0) <= 3 or abs(w[3] - lrect.y1) <= 3:
+                line.append(w)
+                lrect |= w[:4]
+            else:
+                line.sort(key=lambda w: w[0])
+                nwords.extend(line)
+                line = [w]
+                lrect = pymupdf.Rect(w[:4])
+        line.sort(key=lambda w: w[0])
+        nwords.extend(line)
+        return nwords
+
     def get_page_output(doc, pno, margins, textflags):
         """Process one page.
 
@@ -801,17 +867,17 @@ def get_page_output(doc, pno, margins, textflags):
         if EXTRACT_WORDS is True:
             # output words in sequence compliant with Markdown text
             rawwords = textpage.extractWORDS()
+            rawwords.sort(key=lambda w: (w[3], w[0]))
             words = []
             for lrect in line_rects:
                 lwords = []
                 for w in rawwords:
                     wrect = pymupdf.Rect(w[:4])
                     if wrect in lrect:
-                        wrect.y0 = lrect.y0  # set upper coord to line
-                        wrect.y1 = lrect.y1  # set lower coord to line
-                        lwords.append(list(wrect) + list(w[4:]))
+                        lwords.append(w)
                 # append sorted words of this line
-                words.extend(sorted(lwords, key=lambda w: w[0]))
+                # words.extend(sorted(lwords, key=lambda w: w[0]))
+                words.extend(sort_words(lwords))
 
             # remove word duplicates without spoiling the sequence
             # duplicates may occur for multiple reasons
diff --git a/pymupdf4llm/pymupdf4llm/llama/pdf_markdown_reader.py b/pymupdf4llm/pymupdf4llm/llama/pdf_markdown_reader.py
index ec85dc7a..48c40894 100644
--- a/pymupdf4llm/pymupdf4llm/llama/pdf_markdown_reader.py
+++ b/pymupdf4llm/pymupdf4llm/llama/pdf_markdown_reader.py
@@ -2,9 +2,7 @@
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import pymupdf
-
 from pymupdf import Document as FitzDocument
-
 from pymupdf4llm import IdentifyHeaders, to_markdown
 
 try:
@@ -23,7 +21,9 @@ class PDFMarkdownReader(BaseReader):
 
     def __init__(
         self,
-        meta_filter: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
+        meta_filter: Optional[
+            Callable[[Dict[str, Any]], Dict[str, Any]]
+        ] = None,
     ):
         self.meta_filter = meta_filter
 
@@ -78,7 +78,9 @@ def _process_doc_page(
         hdr_info: IdentifyHeaders,
     ):
         """Processes a single page of a PDF document."""
-        extra_info = self._process_doc_meta(doc, file_path, page_number, extra_info)
+        extra_info = self._process_doc_meta(
+            doc, file_path, page_number, extra_info
+        )
 
         if self.meta_filter:
             extra_info = self.meta_filter(extra_info)
diff --git a/pymupdf4llm/setup.py b/pymupdf4llm/setup.py
index 6b4fe33a..af047b36 100644
--- a/pymupdf4llm/setup.py
+++ b/pymupdf4llm/setup.py
@@ -17,7 +17,7 @@
 
 setuptools.setup(
     name="pymupdf4llm",
-    version="0.0.14",
+    version="0.0.15",
     author="Artifex",
     author_email="support@artifex.com",
     description="PyMuPDF Utilities for LLM/RAG",