Data Import
Core Import Tools
Command-line Interface
# Identify importable files
bean-identify config.py ~/Downloads
# Extract transactions
bean-extract -e ledger.beancount config.py ~/Downloads > extracted.beancount
# File documents
bean-file -o ~/documents config.py ~/Downloads
Importer Implementation
Basic Importer Structure
from beancount.ingest import importer
from beancount.core import data
class CustomImporter(importer.ImporterProtocol):
def __init__(self, account_root):
self.account_root = account_root
def identify(self, file):
"""Return true if this importer can handle the file."""
return file.name.endswith('.csv')
def extract(self, file):
"""Extract Beancount directives from file."""
entries = []
# Process file and create entries
return entries
def file_account(self):
"""Return account for filing."""
return self.account_root
def file_name(self, file):
"""Return desired filed filename."""
return 'renamed-{}'.format(file.name)
def file_date(self, file):
"""Extract date from file contents."""
return datetime.date.today()
Configuration Setup
#!/usr/bin/env python3
from importers import bank, investment, credit
CONFIG = [
bank.Importer(
account='Assets:US:Bank:Checking',
currency='USD'
),
investment.Importer(
account='Assets:US:Investment',
commission_account='Expenses:Fees:Commission'
),
credit.Importer(
account='Liabilities:US:Credit',
currency='USD'
)
]
Testing Framework
Regression Test Setup
from beancount.ingest import regression
class TestImporter(unittest.TestCase):
@regression.check_file(
account="Assets:Test",
regexp_mime="text/csv"
)
def test_basic(self, importer, file):
"""Basic CSV import test."""
entries = importer.extract(file)
self.assertTrue(entries)
Test File Structure
importers/
├── __init__.py
├── bank/
│ ├── __init__.py
│ ├── importer.py
│ ├── test_sample.csv
│ ├── test_sample.csv.extract
│ └── test_sample.csv.file_date
File Processing
File Conversion Cache
def process_file(file):
# Use file.convert() for caching
text = file.convert(conversion_function)
return text
def conversion_function(filename):
"""Convert file content with caching."""
# Implementation
PDF Processing
def extract_pdf_text(filename):
"""Extract text from PDF with fallbacks."""
try:
return extract_with_pdfminer(filename)
except:
try:
return extract_with_pdftotext(filename)
except:
return extract_with_poppler(filename)
Directory Organization
Recommended Structure
project/
├── documents/
│ ├── Assets/
│ ├── Liabilities/