diff --git a/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaParsersConfigBuildItem.java b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaParsersConfigBuildItem.java new file mode 100644 index 000000000..69952bb6c --- /dev/null +++ b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaParsersConfigBuildItem.java @@ -0,0 +1,21 @@ +package io.quarkus.tika.deployment; + +import java.util.List; +import java.util.Map; + +import io.quarkus.builder.item.SimpleBuildItem; +import io.quarkus.tika.runtime.TikaParserParameter; + +public final class TikaParsersConfigBuildItem extends SimpleBuildItem { + + private final Map> parsersConfig; + + public TikaParsersConfigBuildItem(Map> parsersConfig) { + this.parsersConfig = parsersConfig; + } + + public Map> getConfiguration() { + return parsersConfig; + } + +} diff --git a/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java index a6dda4d54..6ca09a9b5 100644 --- a/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java +++ b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java @@ -2,18 +2,19 @@ package io.quarkus.tika.deployment; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; import org.apache.tika.detect.Detector; import org.apache.tika.detect.EncodingDetector; import org.apache.tika.parser.Parser; -import org.eclipse.microprofile.config.ConfigProvider; import io.quarkus.arc.deployment.BeanContainerBuildItem; import io.quarkus.deployment.Capabilities; @@ -28,7 +29,9 @@ import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceBuildItem; import io.quarkus.deployment.builditem.nativeimage.RuntimeInitializedClassBuildItem; import io.quarkus.deployment.builditem.nativeimage.ServiceProviderBuildItem; import io.quarkus.deployment.util.ServiceUtil; +import io.quarkus.tika.TikaParseException; import io.quarkus.tika.runtime.TikaConfiguration; +import io.quarkus.tika.runtime.TikaParserParameter; import io.quarkus.tika.runtime.TikaRecorder; public class TikaProcessor { @@ -51,8 +54,12 @@ public class TikaProcessor { @BuildStep @Record(ExecutionTime.STATIC_INIT) - void initializeTikaParser(BeanContainerBuildItem beanContainer, TikaRecorder recorder) throws Exception { - recorder.initTikaParser(beanContainer.getValue(), config, getSupportedParserNames(config.parsers)); + TikaParsersConfigBuildItem initializeTikaParser(BeanContainerBuildItem beanContainer, TikaRecorder recorder) + throws Exception { + Map> parsersConfig = getSupportedParserConfig(config.tikaConfigPath, config.parsers, + config.parserOptions, config.parser); + recorder.initTikaParser(beanContainer.getValue(), config, parsersConfig); + return new TikaParsersConfigBuildItem(parsersConfig); } @BuildStep @@ -95,9 +102,11 @@ public class TikaProcessor { } @BuildStep - public void registerTikaProviders(BuildProducer serviceProvider) throws Exception { + public void registerTikaProviders(BuildProducer serviceProvider, + TikaParsersConfigBuildItem parserConfigItem) throws Exception { serviceProvider.produce( - new ServiceProviderBuildItem(Parser.class.getName(), getSupportedParserNames(config.parsers))); + new ServiceProviderBuildItem(Parser.class.getName(), + new ArrayList<>(parserConfigItem.getConfiguration().keySet()))); serviceProvider.produce( new ServiceProviderBuildItem(Detector.class.getName(), getProviderNames(Detector.class.getName()))); serviceProvider.produce( @@ -110,31 +119,95 @@ public class TikaProcessor { "META-INF/services/" + serviceProviderName)); } - static List getSupportedParserNames(Optional requiredParsers) throws Exception { + static Map> getSupportedParserConfig(Optional tikaConfigPath, + Optional requiredParsers, + Map> parserParamMaps, + Map parserAbbreviations) throws Exception { Predicate pred = p -> !NOT_NATIVE_READY_PARSERS.contains(p); List providerNames = getProviderNames(Parser.class.getName()); - if (!requiredParsers.isPresent()) { - return providerNames.stream().filter(pred).collect(Collectors.toList()); + if (tikaConfigPath.isPresent() || !requiredParsers.isPresent()) { + return providerNames.stream().filter(pred).collect(Collectors.toMap(Function.identity(), + p -> Collections. emptyList())); } else { List abbreviations = Arrays.stream(requiredParsers.get().split(",")).map(s -> s.trim()) .collect(Collectors.toList()); - Set requiredParsersFullNames = abbreviations.stream() - .map(p -> getParserNameFromConfig(p)).collect(Collectors.toSet()); + Map fullNamesAndAbbreviations = abbreviations.stream() + .collect(Collectors.toMap(p -> getParserNameFromConfig(p, parserAbbreviations), Function.identity())); - return providerNames.stream().filter(pred).filter(p -> requiredParsersFullNames.contains(p)) - .collect(Collectors.toList()); + return providerNames.stream().filter(pred).filter(p -> fullNamesAndAbbreviations.containsKey(p)) + .collect(Collectors.toMap(Function.identity(), + p -> getParserConfig(p, parserParamMaps.get(fullNamesAndAbbreviations.get(p))))); } } - private static String getParserNameFromConfig(String abbreviation) { + static List getParserConfig(String parserName, Map parserParamMap) { + List parserParams = new LinkedList<>(); + if (parserParamMap != null) { + for (Map.Entry entry : parserParamMap.entrySet()) { + String paramName = unhyphenate(entry.getKey()); + String paramType = getParserParamType(parserName, paramName); + parserParams.add(new TikaParserParameter(paramName, entry.getValue(), paramType)); + } + } + return parserParams; + } + + private static String getParserNameFromConfig(String abbreviation, Map parserAbbreviations) { if (PARSER_ABBREVIATIONS.containsKey(abbreviation)) { return PARSER_ABBREVIATIONS.get(abbreviation); } + + if (parserAbbreviations.containsKey(abbreviation)) { + return parserAbbreviations.get(abbreviation); + } + + throw new IllegalStateException("The custom abbreviation `" + abbreviation + + "` can not be resolved to a parser class name, please set a " + + "quarkus.tika.parser-name." + abbreviation + " property"); + } + + // Convert a property name such as "sort-by-position" to "sortByPosition" + private static String unhyphenate(String paramName) { + StringBuilder sb = new StringBuilder(); + String[] words = paramName.split("-"); + for (int i = 0; i < words.length; i++) { + sb.append(i > 0 ? capitalize(words[i]) : words[i]); + } + return sb.toString(); + } + + private static String capitalize(String paramName) { + char[] chars = paramName.toCharArray(); + chars[0] = Character.toUpperCase(chars[0]); + return new String(chars); + } + + // TODO: Remove the reflection code below once TikaConfig becomes capable + // of loading the parameters without the type attribute: TIKA-2944 + + private static Class loadParserClass(String parserName) { try { - return ConfigProvider.getConfig().getValue(abbreviation, String.class); - } catch (NoSuchElementException ex) { - throw new IllegalStateException("The custom abbreviation " + abbreviation - + " can not be resolved to a parser class name"); + return TikaProcessor.class.getClassLoader().loadClass(parserName); + } catch (Throwable t) { + final String errorMessage = "Parser " + parserName + " can not be loaded"; + throw new TikaParseException(errorMessage); + } + } + + private static String getParserParamType(String parserName, String paramName) { + try { + Class parserClass = loadParserClass(parserName); + String paramType = parserClass.getMethod("get" + capitalize(paramName), new Class[] {}).getReturnType() + .getSimpleName().toLowerCase(); + if (paramType.equals(boolean.class.getSimpleName())) { + // TikaConfig Param class does not recognize 'boolean', only 'bool' + // This whole reflection code is temporary anyway + paramType = "bool"; + } + return paramType; + } catch (Throwable t) { + final String errorMessage = "Parser " + parserName + " has no " + paramName + " property"; + throw new TikaParseException(errorMessage); } } } diff --git a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TestConfigSource.java b/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TestConfigSource.java deleted file mode 100644 index 5c2007454..000000000 --- a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TestConfigSource.java +++ /dev/null @@ -1,24 +0,0 @@ -package io.quarkus.tika.deployment; - -import java.util.Collections; -import java.util.Map; - -import org.eclipse.microprofile.config.spi.ConfigSource; - -public class TestConfigSource implements ConfigSource { - - @Override - public Map getProperties() { - return Collections.singletonMap("opendoc", "org.apache.tika.parser.odf.OpenDocumentParser"); - } - - @Override - public String getValue(String propertyName) { - return getProperties().get(propertyName); - } - - @Override - public String getName() { - return "test-source"; - } -} diff --git a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java b/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java index e396c5db7..6f73c2152 100644 --- a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java +++ b/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java @@ -4,8 +4,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.Set; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.spi.ConfigProviderResolver; @@ -14,6 +17,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import io.quarkus.runtime.configuration.QuarkusConfigFactory; +import io.quarkus.tika.runtime.TikaParserParameter; import io.smallrye.config.SmallRyeConfig; import io.smallrye.config.SmallRyeConfigBuilder; @@ -45,27 +49,55 @@ public class TikaProcessorTest { } @Test - public void testSupportedParserNames() throws Exception { - Optional parserNames = Optional.of("pdf"); - List names = TikaProcessor.getSupportedParserNames(parserNames); + public void testPDFParserName() throws Exception { + Set names = getParserNames(null, "pdf"); assertEquals(1, names.size()); - assertEquals("org.apache.tika.parser.pdf.PDFParser", names.get(0)); + assertTrue(names.contains("org.apache.tika.parser.pdf.PDFParser")); } @Test - public void testResolvableCustomAbbreviation() throws Exception { - Optional parserNames = Optional.of("pdf,opendoc"); - List names = TikaProcessor.getSupportedParserNames(parserNames); + public void testODFParserName() throws Exception { + Set names = getParserNames(null, "odf"); + assertEquals(1, names.size()); + assertTrue(names.contains("org.apache.tika.parser.odf.OpenDocumentParser")); + } + + @Test + public void testSupportedParserNames() throws Exception { + Set names = getParserNames(null, "pdf,odf"); assertEquals(2, names.size()); assertTrue(names.contains("org.apache.tika.parser.pdf.PDFParser")); assertTrue(names.contains("org.apache.tika.parser.odf.OpenDocumentParser")); } + @Test + public void testResolvableCustomAbbreviation() throws Exception { + Set names = getParserConfig(null, "pdf,opendoc", Collections.emptyMap(), + Collections.singletonMap("opendoc", + "org.apache.tika.parser.odf.OpenDocumentParser")).keySet(); + assertEquals(2, names.size()); + assertTrue(names.contains("org.apache.tika.parser.pdf.PDFParser")); + assertTrue(names.contains("org.apache.tika.parser.odf.OpenDocumentParser")); + } + + @Test + public void testPdfParserConfig() throws Exception { + Map> parserConfig = getParserConfig(null, "pdf", + Collections.singletonMap("pdf", + Collections.singletonMap("sort-by-position", "true")), + Collections.emptyMap()); + assertEquals(1, parserConfig.size()); + + String pdfParserFullName = "org.apache.tika.parser.pdf.PDFParser"; + assertEquals(1, parserConfig.get(pdfParserFullName).size()); + assertEquals("sortByPosition", parserConfig.get(pdfParserFullName).get(0).getName()); + assertEquals("true", parserConfig.get(pdfParserFullName).get(0).getValue()); + } + @Test public void testUnresolvableCustomAbbreviation() throws Exception { - Optional parserNames = Optional.of("classparser"); try { - TikaProcessor.getSupportedParserNames(parserNames); + getParserNames(null, "classparser"); fail("'classparser' is not resolvable"); } catch (IllegalStateException ex) { // expected @@ -74,8 +106,26 @@ public class TikaProcessorTest { @Test public void testAllSupportedParserNames() throws Exception { - Optional parserNames = Optional.ofNullable(null); - List names = TikaProcessor.getSupportedParserNames(parserNames); + assertEquals(69, getParserNames(null, null).size()); + } + + @Test + public void testSupportedParserNamesWithTikaConfigPath() throws Exception { + Set names = getParserNames("tika-config.xml", "pdf"); assertEquals(69, names.size()); } + + private Set getParserNames(String tikaConfigPath, String parsers) throws Exception { + return TikaProcessor.getSupportedParserConfig( + Optional.ofNullable(tikaConfigPath), Optional.ofNullable(parsers), + Collections.emptyMap(), Collections.emptyMap()).keySet(); + } + + private Map> getParserConfig(String tikaConfigPath, String parsers, + Map> parserParamMaps, + Map parserAbbreviations) throws Exception { + return TikaProcessor.getSupportedParserConfig( + Optional.ofNullable(tikaConfigPath), Optional.ofNullable(parsers), + parserParamMaps, parserAbbreviations); + } } diff --git a/extensions/tika/deployment/src/test/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSource b/extensions/tika/deployment/src/test/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSource deleted file mode 100644 index 9243c9d00..000000000 --- a/extensions/tika/deployment/src/test/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSource +++ /dev/null @@ -1 +0,0 @@ -io.quarkus.tika.deployment.TestConfigSource diff --git a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java index f56177117..1069b37fa 100644 --- a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java +++ b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java @@ -1,5 +1,6 @@ package io.quarkus.tika.runtime; +import java.util.Map; import java.util.Optional; import io.quarkus.runtime.annotations.ConfigItem; @@ -26,16 +27,15 @@ public class TikaConfiguration { * property is recommended to achieve both optimizations. *

* Either the abbreviated or full parser class names can be used. - * At the moment only PDF parser can be listed using a reserved 'pdf' abbreviation. + * Only PDF and OpenDocument format parsers can be listed using the reserved 'pdf' and 'odf' abbreviations. * Custom class name abbreviations have to be used for all other parsers. * For example: * *

      * // Only PDF parser is required:
-     * tika-parsers = pdf
-     * // Only PDF and Java class parsers are required:
-     * tika-parsers = pdf,classparser
-     * classparser = org.apache.tika.parser.asm.ClassParser
+     * quarkus.tika.parsers = pdf
+     * // Only PDF and OpenDocument parsers are required:
+     * quarkus.tika.parsers = pdf,odf
      * 
* * This property will have no effect if the `tikaConfigPath' property has been set. @@ -43,6 +43,28 @@ public class TikaConfiguration { @ConfigItem public Optional parsers; + /** + * Configuration of the individual parsers. + * For example: + * + *
+     * quarkus.tika.parsers = pdf,odf
+     * quarkus.tika.parser-options.pdf.sort-by-position = true
+     */
+    @ConfigItem
+    public Map> parserOptions;
+
+    /**
+     * Full parser class name for a given parser abbreviation.
+     * For example:
+     *
+     * 
+     * quarkus.tika.parsers = classparser
+     * quarkus.tika.parser.classparser = org.apache.tika.parser.asm.ClassParser
+     */
+    @ConfigItem
+    public Map parser;
+
     /**
      * Controls how the content of the embedded documents is parsed.
      * By default it is appended to the master document content.
diff --git a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaParserParameter.java b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaParserParameter.java
new file mode 100644
index 000000000..057532068
--- /dev/null
+++ b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaParserParameter.java
@@ -0,0 +1,41 @@
+package io.quarkus.tika.runtime;
+
+public class TikaParserParameter {
+    private String name;
+    private String value;
+    private String type;
+
+    public TikaParserParameter() {
+
+    }
+
+    public TikaParserParameter(String name, String value, String type) {
+        this.name = name;
+        this.value = value;
+        this.type = type;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getType() {
+        return type;
+    }
+
+    public void setType(String type) {
+        this.type = type;
+    }
+
+    public String getValue() {
+        return value;
+    }
+
+    public void setValue(String value) {
+        this.value = value;
+    }
+}
diff --git a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaRecorder.java b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaRecorder.java
index b263907c9..f0330428e 100644
--- a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaRecorder.java
+++ b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaRecorder.java
@@ -4,6 +4,8 @@ import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.parser.AutoDetectParser;
@@ -18,16 +20,17 @@ import io.quarkus.tika.TikaParser;
 @Recorder
 public class TikaRecorder {
 
-    public void initTikaParser(BeanContainer container, TikaConfiguration config, List supportedParserNames) {
-        TikaParser parser = initializeParser(config, supportedParserNames);
+    public void initTikaParser(BeanContainer container, TikaConfiguration config,
+            Map> parserConfig) {
+        TikaParser parser = initializeParser(config, parserConfig);
         TikaParserProducer producer = container.instance(TikaParserProducer.class);
         producer.initialize(parser);
     }
 
-    private TikaParser initializeParser(TikaConfiguration config, List supportedParserNames) {
+    private TikaParser initializeParser(TikaConfiguration config, Map> parserConfig) {
         TikaConfig tikaConfig = null;
 
-        try (InputStream stream = getTikaConfigStream(config, supportedParserNames)) {
+        try (InputStream stream = getTikaConfigStream(config, parserConfig)) {
             tikaConfig = new TikaConfig(stream);
         } catch (Exception ex) {
             final String errorMessage = "Invalid tika-config.xml";
@@ -44,7 +47,8 @@ public class TikaRecorder {
         return new TikaParser(nativeParser, config.appendEmbeddedContent);
     }
 
-    private static InputStream getTikaConfigStream(TikaConfiguration config, List supportedParserNames) {
+    private static InputStream getTikaConfigStream(TikaConfiguration config,
+            Map> parserConfig) {
         // Load tika-config.xml resource
         InputStream is = null;
         if (config.tikaConfigPath.isPresent()) {
@@ -56,20 +60,35 @@ public class TikaRecorder {
                 throw new TikaParseException(errorMessage);
             }
         } else {
-            is = generateTikaConfig(supportedParserNames);
+            is = generateTikaConfig(parserConfig);
         }
         return is;
     }
 
-    private static InputStream generateTikaConfig(List supportedParserNames) {
+    private static InputStream generateTikaConfig(Map> parserConfig) {
         StringBuilder sb = new StringBuilder();
         sb.append("");
         sb.append("");
-        for (String parserName : supportedParserNames) {
-            sb.append("");
+        for (Entry> parserEntry : parserConfig.entrySet()) {
+            sb.append("");
+            if (!parserEntry.getValue().isEmpty()) {
+                appendParserParameters(sb, parserEntry.getValue());
+            }
+            sb.append("");
         }
         sb.append("");
         sb.append("");
         return new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8));
     }
+
+    private static void appendParserParameters(StringBuilder sb, List parserParams) {
+        sb.append("");
+        for (TikaParserParameter parserParam : parserParams) {
+            sb.append("");
+            sb.append(parserParam.getValue());
+            sb.append("");
+        }
+        sb.append("");
+    }
 }
diff --git a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
index 54a502ffb..995ecbb18 100644
--- a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
+++ b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
@@ -10,6 +10,8 @@ import javax.ws.rs.core.MediaType;
 
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.parser.pdf.PDFParser;
 
 import io.quarkus.tika.TikaContent;
 import io.quarkus.tika.TikaParser;
@@ -18,7 +20,8 @@ import io.quarkus.tika.TikaParser;
 public class TikaEmdeddedContentResource {
 
     // Avoiding the injection, otherwise the recorded tika-config.xml intended for TikaPdfInvoiceTest is used
-    TikaParser parser = new TikaParser(new RecursiveParserWrapper(new AutoDetectParser(), true), false);
+    TikaParser parser = new TikaParser(new RecursiveParserWrapper(
+            new AutoDetectParser(new OfficeParser(), new PDFParser()), true), false);
 
     @POST
     @Path("/outerText")
diff --git a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaParserResource.java b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaParserResource.java
index 7ecaac0ca..d22c507a2 100644
--- a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaParserResource.java
+++ b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaParserResource.java
@@ -9,13 +9,17 @@ import javax.ws.rs.Produces;
 import javax.ws.rs.core.MediaType;
 
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.csv.TextAndCSVParser;
+import org.apache.tika.parser.odf.OpenDocumentParser;
+import org.apache.tika.parser.pdf.PDFParser;
 
 import io.quarkus.tika.TikaParser;
 
 @Path("/parse")
 public class TikaParserResource {
     // Avoiding the injection, otherwise the recorded tika-config.xml intended for TikaPdfInvoiceTest is used
-    TikaParser parser = new TikaParser(new AutoDetectParser(), true);
+    TikaParser parser = new TikaParser(
+            new AutoDetectParser(new PDFParser(), new OpenDocumentParser(), new TextAndCSVParser()), true);
 
     @POST
     @Path("/text")
diff --git a/integration-tests/tika/src/main/resources/application.properties b/integration-tests/tika/src/main/resources/application.properties
index d3eddbefb..e93410248 100644
--- a/integration-tests/tika/src/main/resources/application.properties
+++ b/integration-tests/tika/src/main/resources/application.properties
@@ -1 +1,2 @@
-quarkus.tika.tika-config-path=tika-config.xml
\ No newline at end of file
+quarkus.tika.parsers=pdf
+quarkus.tika.parser-options.pdf.sort-by-position=true
diff --git a/integration-tests/tika/src/main/resources/tika-config.xml b/integration-tests/tika/src/main/resources/tika-config.xml
deleted file mode 100644
index 7649ac562..000000000
--- a/integration-tests/tika/src/main/resources/tika-config.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-
-  
-    
-      
-        true
-      
-    
-  
-
\ No newline at end of file