<?php

namespace Bespin\DocumentClient\Parser;

use Bespin\DocumentType\DocumentType;
use Bespin\DocumentClient\File\PdfFile;
use Bespin\DocumentClient\DocumentType\Document;
use Bespin\DocumentClient\DocumentType\Payroll;
use Bespin\DocumentClient\DocumentType\WageTax;
use Bespin\DocumentClient\Model\ParserModelInterface;
use DateTime;
use DateTimeZone;
use Exception;

class DatevParser
{
    private PdfFile $pdfDocument;

    public function __construct(PdfFile $pdfDocument)
    {
        $this->pdfDocument = $pdfDocument;
    }

    public function determineDocumentType(string $content = ''): array
    {
        if ($content === '') {
            $content = $this->pdfDocument->getText();
        }
        $result = [];
        if (str_contains($content, 'LOGN15')) {
            // That string is the template name for payroll (DATEV)
            $result[DocumentType::PAYROLL->value] = DocumentType::PAYROLL;
        }
        if (str_contains($content, 'LO47') && str_contains(strtolower($content), 'lohnsteuerbescheinigung')) {
            // That string is the template name for wage tax certification (DATEV)
            $result[DocumentType::WAGE_TAX->value] = DocumentType::WAGE_TAX;
        }
        return $result;
    }

    public function getDocuments(ParserModelInterface $parserModel): array
    {
        $pages          = $this->pdfDocument->splitDocumentGetPages();
        $result         = [];
        $remainingPages = [];
        foreach ($pages as $page) {
            $textContent = $page->getText();
            if (str_contains($textContent, 'LOGN15')) {

                $payrollPage = $this->parseLOGN15($page, $parserModel);
                if ($payrollPage !== null) {
                    $result[] = $payrollPage;
                } else {
                    $remainingPages[$page->getParentPageNumber()] = $page;
                }
            } elseif (str_contains($textContent, 'LO4722')) {
                $wageTaxPage = $this->parseLO4722($page, $parserModel);
                if ($wageTaxPage !== null) {
                    $result[] = $wageTaxPage;
                } else {
                    $remainingPages[] = $page;
                }
            } else {
                $remainingPages[] = $page;
            }
        }
        if (!empty($remainingPages)) {
            $result[] = new Document($this->pdfDocument->getPageRange($remainingPages));
        }
        return $result;
    }

    private function parseEmployeeNumber(PdfFile $pdf): int
    {
        $text  = $pdf->getText();
        $start = stripos($text, '*Pers.-Nr.') + 10;
        $end   = strpos($text, '*', $start);
        return (int)substr($text, $start, $end - $start);
    }

    private function parseLO4722(PdfFile $page, ParserModelInterface $parserModel): ?Document
    {
        $employeeNumber = $this->parseEmployeeNumber($page);
        $employee       = $parserModel->getEmployee($employeeNumber);
        if ($employee !== null) {
            $content = strtolower(str_replace(' ', '', $page->getText()));
            // check if employee number and name are matching
            if (str_contains($content, strtolower($employee->firstName)) && str_contains($content, strtolower($employee->lastName))) {
                // determine document date
                $matches = [];
                preg_match('(\d+8ya)', $content, $matches);
                if (count($matches) === 1) {
                    try {
                        $documentDate = DateTime::createFromFormat('YmdHis', substr($matches[0], 0, 14), new DateTimeZone('Europe/Berlin'));
                    } catch (Exception) {
                        $documentDate = null;
                    }
                    $content = strtolower(str_replace(' ', '', $content));
                    $needle  = 'ausdruckderelektronischenlohnsteuerbescheinigungfür';
                    $start   = strpos($content, $needle);
                    if ($start !== false) {
                        if ($documentDate !== null) {
                            $page->setDocumentDate($documentDate);
                        }
                        $contentYear = DateTime::createFromFormat('!Y', substr($content, ($start + strlen($needle)), 4));
                        return new WageTax($page, $employee, $contentYear);
                    }
                }
            }
        }
        return null;
    }

    private function parseLOGN15(PdfFile $page, ParserModelInterface $parserModel): ?Document
    {
        $employeeNumber = $this->parseEmployeeNumber($page);
        $employee       = $parserModel->getEmployee($employeeNumber);
        if ($employee !== null) {
            $content = strtolower(str_replace(' ', '', $page->getText()));
            if (str_contains($content, strtolower($employee->firstName)) && str_contains($content, strtolower($employee->lastName))) {
                $loBuMonth = 0;
                $loBuYear  = 0;
                $date      = '';
                $months    = [
                    '01' => 'fürjanuar',
                    '02' => 'fürfebruar',
                    '03' => 'fürmärz',
                    '04' => 'fürapril',
                    '05' => 'fürmai',
                    '06' => 'fürjuni',
                    '07' => 'fürjuli',
                    '08' => 'füraugust',
                    '09' => 'fürseptember',
                    '10' => 'füroktober',
                    '11' => 'fürnovember',
                    '12' => 'fürdezember'
                ];
                foreach ($months as $month => $needle) {
                    if (str_contains($content, $needle)) {
                        $loBuMonth = $month;
                        [$loBuYear, $date, $suffix] = $this->getPageData($content, $needle);
                        break;
                    }
                }
                if ($loBuMonth === 0) {
                    $months = [
                        '01' => 'fürjan',
                        '02' => 'fürfeb',
                        '03' => 'fürmrz',
                        '04' => 'fürapr',
                        '05' => 'fürmai',
                        '06' => 'fürjun',
                        '07' => 'fürjul',
                        '08' => 'füraug',
                        '09' => 'fürsep',
                        '10' => 'fürokt',
                        '11' => 'fürnov',
                        '12' => 'fürdez'
                    ];
                    foreach ($months as $month => $needle) {
                        if (str_contains($content, $needle)) {
                            $loBuMonth = $month;
                            [$loBuYear, $date, $suffix] = $this->getPageData($content, $needle);
                            break;
                        }
                    }
                }

                if ($date !== '') {
                    try {
                        $documentDate = new DateTime(trim($date), new DateTimeZone('Europe/Berlin'));
                    } catch (Exception) {
                        $documentDate = null;
                    }
                    if ($documentDate instanceof DateTime) {
                        $page->setDocumentDate($documentDate);
                    }
                    if ($loBuMonth > 0 && $loBuYear > 0 && $documentDate !== null) {
                        $titleDate = DateTime::createFromFormat('!Y-m-d', $loBuYear.'-'.$loBuMonth.'-01');
                        return new Payroll($page, $employee, $titleDate, $suffix);
                    }
                }
            }
        }
        return null;
    }

    private function getPageData(string $content, string $needle): array
    {
        $pos     = strpos($content, $needle) + strlen($needle);
        $line    = substr($content, $pos, (strpos($content, PHP_EOL, $pos)) - $pos);
        $matches = [];
        preg_match('/\d/', $line, $matches, PREG_OFFSET_CAPTURE);
        $loBuYear = substr($line, $matches[0][1], 4);
        $line     = substr($line, $matches[0][1] + 4);
        $suffix   = '';
        if (str_contains($line, '(') && str_contains($line, ')')) {
            $pos    = strpos($line, '(') + 1;
            $suffix = substr($line, $pos, strpos($line, ')') - $pos);
        }
        $start = str_contains($line, ')') ? (strpos($line, ')') + 1) : 0;
        $date  = substr($line, $start, 10);
        return [$loBuYear, $date, $suffix];
    }
}
