/*
 * Decompiled with CFR 0.152.
 */
package org.jabref.logic.importer.fileformat;

import com.google.common.annotations.VisibleForTesting;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.jabref.logic.citationkeypattern.CitationKeyGenerator;
import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.logic.util.StandardFileType;
import org.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
import org.jabref.logic.xmp.XmpUtilReader;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.Date;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BibliographyFromPdfImporter
extends Importer {
    private static final Logger LOGGER = LoggerFactory.getLogger(BibliographyFromPdfImporter.class);
    private static final Pattern REFERENCE_PATTERN = Pattern.compile("\\[(\\d+)\\](.*?)(?=\\[|$)", 32);
    private static final Pattern YEAR_AT_END = Pattern.compile(", (\\d{4})\\.$");
    private static final Pattern PAGES = Pattern.compile(", pp\\. (\\d+--?\\d+)\\.?(.*)");
    private static final Pattern PAGE = Pattern.compile(", p\\. (\\d+)(.*)");
    private static final Pattern MONTH_RANGE_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.?)-[A-Z][a-z]{2,7}\\.? (\\d+)(.*)");
    private static final Pattern MONTH_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.? \\d+),? ?(.*)");
    private static final Pattern VOLUME = Pattern.compile(", vol\\. (\\d+)(.*)");
    private static final Pattern NO = Pattern.compile(", no\\. (\\d+)(.*)");
    private static final Pattern AUTHORS_AND_TITLE_AT_BEGINNING = Pattern.compile("^([^\u201c]+), \u201c(.*?)\u201d, ");
    private static final Pattern TITLE = Pattern.compile("\u201c(.*?)\u201d, (.*)");
    private final CitationKeyPatternPreferences citationKeyPatternPreferences;

    public BibliographyFromPdfImporter(CitationKeyPatternPreferences citationKeyPatternPreferences) {
        this.citationKeyPatternPreferences = citationKeyPatternPreferences;
    }

    @Override
    public boolean isRecognizedFormat(BufferedReader input) throws IOException {
        return input.readLine().startsWith("%PDF");
    }

    @Override
    public ParserResult importDatabase(BufferedReader reader) throws IOException {
        Objects.requireNonNull(reader);
        throw new UnsupportedOperationException("BibliopgraphyFromPdfImporter does not support importDatabase(BufferedReader reader).Instead use importDatabase(Path filePath).");
    }

    @Override
    public String getName() {
        return "Bibliography from PDF";
    }

    @Override
    public String getDescription() {
        return "Reads the references from the 'References' section of a PDF file.";
    }

    @Override
    public FileType getFileType() {
        return StandardFileType.PDF;
    }

    @Override
    public ParserResult importDatabase(Path filePath) {
        List<BibEntry> result;
        try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath);){
            String contents = this.getLastPageContents(document);
            result = this.getEntriesFromPDFContent(contents);
        }
        catch (EncryptedPdfsNotSupportedException e) {
            return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported.", new Object[0]));
        }
        catch (IOException exception) {
            return ParserResult.fromError(exception);
        }
        ParserResult parserResult = new ParserResult(result);
        CitationKeyGenerator citationKeyGenerator = new CitationKeyGenerator(parserResult.getDatabaseContext(), this.citationKeyPatternPreferences);
        parserResult.getDatabase().getEntries().forEach(citationKeyGenerator::generateAndSetKey);
        return parserResult;
    }

    private List<BibEntry> getEntriesFromPDFContent(String contents) {
        ArrayList<IntermediateData> referencesStrings = new ArrayList<IntermediateData>();
        Matcher matcher = REFERENCE_PATTERN.matcher(contents);
        while (matcher.find()) {
            String reference = matcher.group(2).replaceAll("\\r?\\n", " ").trim();
            referencesStrings.add(new IntermediateData(matcher.group(1), reference));
        }
        return referencesStrings.stream().map(data -> this.parseReference(data.number(), data.reference())).toList();
    }

    private String getLastPageContents(PDDocument document) throws IOException {
        PDFTextStripper stripper = new PDFTextStripper();
        int lastPage = document.getNumberOfPages();
        stripper.setStartPage(lastPage);
        stripper.setEndPage(lastPage);
        StringWriter writer = new StringWriter();
        stripper.writeText(document, (Writer)writer);
        return writer.toString();
    }

    @VisibleForTesting
    BibEntry parseReference(String number, String reference) {
        Optional<Date> parsedDate;
        Matcher matcher;
        String originalReference = "[" + number + "] " + (String)reference;
        BibEntry result = new BibEntry(StandardEntryType.Article);
        int pos = ((String)(reference = ((String)reference).replace(".-", "-"))).indexOf("doi:");
        if (pos >= 0) {
            String doi = ((String)reference).substring(pos + "doi:".length()).trim();
            doi = doi.replace(" ", "");
            result.setField(StandardField.DOI, doi);
            reference = ((String)reference).substring(0, pos).trim();
        }
        if ((matcher = YEAR_AT_END.matcher((CharSequence)reference)).find()) {
            result.setField(StandardField.YEAR, matcher.group(1));
            reference = ((String)reference).substring(0, matcher.start()).trim();
        }
        reference = BibliographyFromPdfImporter.updateEntryAndReferenceIfMatches((String)reference, PAGES, result, StandardField.PAGES);
        matcher = MONTH_RANGE_AND_YEAR.matcher((CharSequence)(reference = BibliographyFromPdfImporter.updateEntryAndReferenceIfMatches((String)reference, PAGE, result, StandardField.PAGES)));
        if (matcher.find()) {
            reference = ((String)reference).substring(0, matcher.start()) + ", " + matcher.group(1) + " " + matcher.group(2) + matcher.group(3);
        }
        if ((matcher = MONTH_AND_YEAR.matcher((CharSequence)reference)).find() && (parsedDate = Date.parse(matcher.group(1))).isPresent()) {
            Date date = parsedDate.get();
            date.getYear().ifPresent(year -> result.setField(StandardField.YEAR, year.toString()));
            date.getMonth().ifPresent(month -> result.setField(StandardField.MONTH, month.getJabRefFormat()));
            String prefix = ((String)reference).substring(0, matcher.start()).trim();
            Object suffix = matcher.group(2);
            suffix = !((String)suffix).isEmpty() && !".".equals(suffix) ? ", " + ((String)suffix).replaceAll("^\\. ", "") : "";
            reference = prefix + (String)suffix;
        }
        reference = BibliographyFromPdfImporter.updateEntryAndReferenceIfMatches((String)reference, VOLUME, result, StandardField.VOLUME);
        matcher = AUTHORS_AND_TITLE_AT_BEGINNING.matcher((CharSequence)(reference = BibliographyFromPdfImporter.updateEntryAndReferenceIfMatches((String)reference, NO, result, StandardField.NUMBER)));
        if (matcher.find()) {
            String authors = matcher.group(1).replace("- ", "").replaceAll("et al\\.?", "and others");
            result.setField(StandardField.AUTHOR, AuthorList.fixAuthorFirstNameFirst(authors));
            result.setField(StandardField.TITLE, matcher.group(2).replace("- ", "").replaceAll("et al\\.?", "and others"));
            reference = ((String)reference).substring(matcher.end()).trim();
        } else {
            reference = BibliographyFromPdfImporter.updateEntryAndReferenceIfMatches((String)reference, TITLE, result, StandardField.TITLE);
        }
        List<String> stringsToRemove = List.of("presented at", "to be presented at");
        for (String check : stringsToRemove) {
            if (!((String)reference).startsWith(check)) continue;
            reference = ((String)reference).substring(check.length()).trim();
            result.setType(StandardEntryType.InProceedings);
        }
        boolean startsWithInProc = ((String)reference).startsWith("in Proc.");
        boolean conainsWorkshop = ((String)reference).contains("Workshop");
        if (startsWithInProc || conainsWorkshop) {
            int beginIndex = startsWithInProc ? 3 : 0;
            result.setField(StandardField.BOOKTITLE, ((String)reference).substring(beginIndex).replace("- ", "").trim());
            result.setType(StandardEntryType.InProceedings);
            reference = "";
        }
        if (!((String)(reference = ((String)reference).trim().replace("- ", "").replaceAll("\\.$", ""))).contains(",") && !((String)reference).isEmpty()) {
            if (((String)reference).endsWith(" Note") || ((String)reference).endsWith(" note")) {
                result.setField(StandardField.NOTE, (String)reference);
                result.setType(StandardEntryType.TechReport);
            } else {
                result.setField(StandardField.JOURNAL, ((String)reference).replace("- ", ""));
            }
            reference = "";
        } else {
            Object toAdd = reference;
            result.setType(StandardEntryType.InProceedings);
            if (result.hasField(StandardField.BOOKTITLE)) {
                String oldTitle = result.getField(StandardField.BOOKTITLE).get();
                result.setField(StandardField.BOOKTITLE, oldTitle + (String)toAdd);
            } else {
                result.setField(StandardField.BOOKTITLE, (String)toAdd);
            }
            reference = "";
            LOGGER.debug("InProceedings fallback used for current state of handled string {}", reference);
        }
        if (((String)reference).isEmpty()) {
            result.setField(StandardField.COMMENT, originalReference);
        } else {
            result.setField(StandardField.COMMENT, "Unprocessed: " + (String)reference + "\n\n" + originalReference);
        }
        return result;
    }

    private static String updateEntryAndReferenceIfMatches(String reference, Pattern pattern, BibEntry result, Field field) {
        Matcher matcher = pattern.matcher((CharSequence)reference);
        if (matcher.find()) {
            result.setField(field, matcher.group(1).replace("- ", ""));
            Object suffix = matcher.group(2);
            if (!((String)suffix).isEmpty()) {
                suffix = " " + (String)suffix;
            }
            reference = ((String)reference).substring(0, matcher.start()).trim() + (String)suffix;
        }
        return reference;
    }

    private record IntermediateData(String number, String reference) {
    }
}

