Phase 4 – Final Receipt Reader Script

✅ Final Script: receipt_reader.py

import pytesseract
import cv2
import csv
import re
from PIL import Image

# Set the path to Tesseract OCR (adjust if needed)
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# === PHASE 1: Load image and extract raw text ===
img = cv2.imread('receipt.jpg')  # Load receipt image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
raw_text = pytesseract.image_to_string(gray)  # Extract raw text

lines = raw_text.split('\n')  # Split into lines
lines = [line.strip() for line in lines if line.strip() != '']  # Clean empty lines

print("Cleaned Lines:")
for line in lines:
    print(f"-> {line}")

# === PHASE 2: Extract item names and prices ===
expenses = []
for line in lines:
    line = line.strip()
    if any(word in line.lower() for word in ['total', 'cash', 'change', 'receipt', 'thank', 'approval', 'code']):
        continue

    match = re.search(r'(.+?)\s+([₦N]?\s?[\d.,]+)[\)]?$', line)
    if match:
        item = match.group(1).strip()
        price_str = match.group(2).strip().replace('₦', '').replace('N', '').replace(',', '.').strip()
        try:
            price = round(float(price_str))
            expenses.append({'item': item, 'price': price})
        except ValueError:
            continue

# === Show the results ===
print("\nStructured Data:")
for e in expenses:
    print(f"{e['item']} - ₦{e['price']}")

# === Save to CSV file ===
with open('expenses.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Item', 'Price'])
    for e in expenses:
        writer.writerow([e['item'], e['price']])

✅ Your Folder Should Look Like

receipt_reader.py       ← your Python file
receipt.jpg             ← image you want to scan
expenses.csv            ← auto-created after running script

✅ How to Run It

python receipt_reader.py

You should see cleaned items printed in your terminal + expenses.csv saved in the folder.

Back to Home