package de.micromata.genome.gwiki.page.attachments;

import de.micromata.genome.gwiki.utils.html.Html2TextFilter;
import java.io.InputStream;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.cyberneko.html.HTMLConfiguration;

/* loaded from: input_file:de/micromata/genome/gwiki/page/attachments/XmlTextExtractor.class */
public class XmlTextExtractor implements TextExtractor {
    @Override // de.micromata.genome.gwiki.page.attachments.TextExtractor
    public String extractText(String str, InputStream inputStream) {
        XMLDocumentFilter html2TextFilter = new Html2TextFilter();
        HTMLConfiguration hTMLConfiguration = new HTMLConfiguration();
        hTMLConfiguration.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[]{html2TextFilter});
        try {
            hTMLConfiguration.parse(new XMLInputSource((String) null, (String) null, (String) null, inputStream, "UTF-8"));
            return html2TextFilter.getResultText().toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
