Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions llama-index-integrations/readers/llama-index-readers-imap/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
llama_index/_static
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
bin/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
etc/
include/
lib/
lib64/
parts/
sdist/
share/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
.ruff_cache

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints
notebooks/

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pyvenv.cfg

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Jetbrains
.idea
modules/
*.swp

# VsCode
.vscode

# pipenv
Pipfile
Pipfile.lock

# pyright
pyrightconfig.json
21 changes: 21 additions & 0 deletions llama-index-integrations/readers/llama-index-readers-imap/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2025 Andrea Castellini

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
17 changes: 17 additions & 0 deletions llama-index-integrations/readers/llama-index-readers-imap/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
GIT_ROOT ?= $(shell git rev-parse --show-toplevel)

help: ## Show all Makefile targets.
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'

format: ## Run code autoformatters (black).
pre-commit install
git ls-files | xargs pre-commit run black --files

lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy
pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files

test: ## Run tests via pytest.
pytest tests

watch-docs: ## Build and watch documentation.
sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# LlamaIndex Readers Integration: IMAP

## Overview

Simple IMAP reader allows loading emails from a given mailbox. It concatenates useful fields from each email into a single document used by LlamaIndex.

## Installation

```
pip install llama-index-readers-imap
```

## Usage

```python
from llama_index.readers.imap import ImapReader

# Initialize the server
mailbox = ImapReader(
host="<MAIL HOST>",
username="<MAIL USERNAME>",
password="<MAIL PASSWORD>",
)

# Lazy load emails from the given mailbox
emails = mailbox.lazy_load_data(
folder="INBOX", # Customize the folder to read from
metadata_names=[
"uid",
"from_values",
], # Customize the metadata (date is always included). You can get the full list at https://pypi.org/project/imap-tools/#email-attributes
search_criteria=None, # By default all emails are read, customize the query following https://pypi.org/project/imap-tools/#search-criteria
save_attachment=None, # Callback function to save attachments
)
```

## Saving attachments

The `lazy_load_data` function accepts an optional `save_attachment` callback function which, if defined, is called for every attachment in the email.

Its only parameter is an `imap_tools.MailAttachment` which is described in the [official documentation](https://pypi.org/project/imap-tools/#email-attributes). It must return the path of the saved attachment as a string. Every saved attachment will be added to the Document metadata with its saved filename and the original one.

Here's a simple example of the `save_attachment` function:

```python
import imap_tools


def save_attachment(attachment: imap_tools.MailAttachment) -> str:
with open(f"attachments/custom_filename", "wb") as f:
f.write(attachment.payload)
return "attachments/custom_filename"
```

---

This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index).
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from llama_index.readers.imap.base import ImapReader

__all__ = ["ImapReader"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from imap_tools import MailBox, MailAttachment
from llama_index.core.readers.base import BaseReader
from typing import Iterable, Optional, List, Dict, Any, Union, Callable
from llama_index.core.schema import Document
from imap_tools import A, O, N, H, U, AND, OR, NOT, Header, UidRange

SearchCriteria = Union[A, O, N, H, U, AND, OR, NOT, Header, UidRange, None]


class ImapReader(BaseReader):
"""
IMAP reader. Reads email from an IMAP server.

Args:
host (str): IMAP server host
username (str): email address
password (str): email password

"""

mailbox: MailBox

def __init__(self, host: str, username: str, password: str):
"""Initialize IMAP connection"""
self.mailbox = MailBox(host)
self.mailbox.login(username, password)

def lazy_load_data(
self,
folder: str = "INBOX",
metadata_names: Optional[List[str]] = None,
search_criteria: Optional[SearchCriteria] = None,
save_attachment: Callable[[MailAttachment], str] = None,
) -> Iterable[Document]:
"""
Fetch emails from the provided mailbox.

Args:
folder (str, optional): Folder where to look for emails. Defaults to "INBOX".
metadata_names (List[str], optional): Names of metadata fields. Defaults to None. Full list at https://pypi.org/project/imap-tools/#email-attributes
search_criteria (SearchCriteria, optional): Search criteria. Documentation at https://pypi.org/project/imap-tools/#search-criteria
save_attachment (Callable[[MailAttachment], str], optional): Save attachments callback. Defaults to None. Must return the saved filename

"""
if metadata_names is None:
metadata_names = []
# Always add "date" in metadata
metadata_names.append("date")

# If no criteria are set, all emails are taken into account
criteria = search_criteria if search_criteria is not None else A(all=True)
self.mailbox.folder.set(folder)

for msg in self.mailbox.fetch(criteria=criteria):
metadata: Dict[str, Any] = {}
if metadata_names:
metadata = {key: getattr(msg, key, None) for key in metadata_names}

text = f"From: {msg.from_}, To: {msg.to[0]}, Subject: {msg.subject}, Message: {msg.text}"

if save_attachment:
metadata["attachments"] = []
for attachment in msg.attachments:
filename = save_attachment(attachment)
metadata["attachments"].append(
{
"filename": filename,
"original_filename": attachment.filename,
}
)

yield Document(text=text, metadata=metadata)
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[dependency-groups]
dev = [
"ipython==8.10.0",
"jupyter>=1.0.0,<2",
"mypy==0.991",
"pre-commit==3.2.0",
"pylint==2.15.10",
"pytest==7.2.1",
"pytest-mock==3.11.1",
"ruff==0.11.11",
"types-Deprecated>=0.1.0",
"types-PyYAML>=6.0.12.12,<7",
"types-protobuf>=4.24.0.4,<5",
"types-redis==4.5.5.0",
"types-requests==2.28.11.8",
"types-setuptools==67.1.0.0",
"black[jupyter]<=23.9.1,>=23.7.0",
"codespell[toml]>=v2.2.6",
"diff-cover>=9.2.0",
"pytest-cov>=6.1.1",
]

[project]
name = "llama-index-readers-imap"
version = "0.2.0"
description = "llama-index readers imap integration"
authors = [{name = "Andrea Castellini", email = "[email protected]"}]
requires-python = ">=3.9,<4.0"
readme = "README.md"
license = "MIT"
maintainers = [{name = "andyts93"}]
dependencies = [
"imap_tools",
"llama-index-core>=0.13.0,<0.15",
]

[tool.codespell]
check-filenames = true
check-hidden = true
skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"

[tool.hatch.build.targets.sdist]
include = ["llama_index/"]
exclude = ["**/BUILD"]

[tool.hatch.build.targets.wheel]
include = ["llama_index/"]
exclude = ["**/BUILD"]

[tool.llamahub]
contains_example = false
import_path = "llama_index.readers.imap"

[tool.llamahub.class_authors]
ImapReader = "andyts93"

[tool.mypy]
disallow_untyped_defs = true
exclude = ["_static", "build", "examples", "notebooks", "venv"]
ignore_missing_imports = true
python_version = "3.8"
Loading