001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.configuration2.io; 019 020import java.net.MalformedURLException; 021import java.net.URL; 022import java.util.Collections; 023import java.util.LinkedHashSet; 024import java.util.Objects; 025import java.util.Set; 026import java.util.function.Function; 027import java.util.regex.Pattern; 028import java.util.stream.Collectors; 029 030import org.apache.commons.configuration2.ex.ConfigurationDeniedException; 031import org.apache.commons.io.build.AbstractSupplier; 032import org.apache.commons.lang3.StringUtils; 033 034/** 035 * Abstracts services for FileLocationStrategy implementations. 036 * <p> 037 * Note that some FileLocationStrategy implementation use URLs internally to encode file locations. 038 * </p> 039 * <p> 040 * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system 041 * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}. 042 * </p> 043 * <strong>Using System Properties</strong> 044 * <p> 045 * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes, 046 * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}. 047 * </p> 048 * <strong>Using a Builder</strong> 049 * <p> 050 * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter 051 * methods. 052 * </p> 053 * <p> 054 * For example, to programatically enable the shemes "file", "http", "https", and "jar" for all strategies, you write: 055 * </p> 056 * <pre>{@code 057 * final PropertiesConfiguration pc = new PropertiesConfiguration(); 058 * pc.setIncludeListener(PropertiesConfiguration.NOOP_INCLUDE_LISTENER); 059 * final FileHandler handler = new FileHandler(pc); 060 * final CombinedLocationStrategy.Builder builder = new CombinedLocationStrategy.Builder() 061 * .setSchemes(new TreeSet<>(Arrays.asList("file", "http", "https", "jar"))); 062 * // @formatter:off 063 * handler.setLocationStrategy(builder.setSubStrategies(Arrays.asList( 064 * new ProvidedURLLocationStrategy(builder), 065 * new FileSystemLocationStrategy(builder), 066 * new AbsoluteNameLocationStrategy(builder), 067 * new BasePathLocationStrategy(builder), 068 * new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(true).getUnchecked(), 069 * new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(false).getUnchecked(), 070 * new ClasspathLocationStrategy(builder))) 071 * .get()); 072 * // @formatter:on 073 * handler.setBasePath(TEST_BASE_PATH); 074 * handler.setFileName("include-load-url-host-unknown-exception.properties"); 075 * handler.load(); 076 * }</pre> 077 * 078 * 079 * @since 2.15.0 080 * @see FileLocationStrategy 081 */ 082public abstract class AbstractFileLocationStrategy implements FileLocationStrategy { 083 084 /** 085 * Builds new instances for subclasses. 086 * <p> 087 * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system 088 * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}. 089 * </p> 090 * <strong>Using System Properties</strong> 091 * <p> 092 * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes, 093 * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}. 094 * </p> 095 * <strong>Using a Builder</strong> 096 * <p> 097 * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter 098 * methods. 099 * </p> 100 * <p> 101 * See {@link AbstractFileLocationStrategy} learn how to grant an deny URL schemes and hosts. 102 * </p> 103 * 104 * @param <T> The type of {@link FileLocationStrategy} to build. 105 * @param <B> The builder type. 106 */ 107 public abstract static class AbstractBuilder<T extends FileLocationStrategy, B extends AbstractBuilder<T, B>> extends AbstractSupplier<T, B> { 108 109 /** 110 * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive. 111 */ 112 private Set<Pattern> hosts = Collections.emptySet(); 113 /** 114 * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive. 115 */ 116 private Set<String> schemes = Collections.emptySet(); 117 118 /** 119 * Constructs a new instance for subclasses. 120 */ 121 public AbstractBuilder() { 122 // empty 123 } 124 125 Set<Pattern> getHosts() { 126 return hosts; 127 } 128 129 Set<String> getSchemes() { 130 return schemes; 131 } 132 133 /** 134 * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive. 135 * 136 * @param hosts enabled URL-based hosts. 137 * @return {@code this} instance. 138 */ 139 public B setHosts(final Set<Pattern> hosts) { 140 this.hosts = hosts != null ? hosts : Collections.emptySet(); 141 return asThis(); 142 } 143 144 /** 145 * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive. 146 * 147 * @param hosts Regular expressions enabled URL-based hosts. 148 * @return {@code this} instance. 149 */ 150 public B setHostsRegEx(final Set<String> hosts) { 151 return setHosts(hosts.stream().map(e -> Pattern.compile(e, Pattern.CASE_INSENSITIVE)).collect(Collectors.toSet())); 152 } 153 154 /** 155 * Sets enabled URL-based schemes, empty means all are enabled. URL schemes are case-insensitive. 156 * 157 * @param schemes enabled URL-based schemes, the default null means all schemes are allowed. 158 * @return {@code this} instance. 159 */ 160 public B setSchemes(final Set<String> schemes) { 161 this.schemes = schemes != null ? schemes : Collections.emptySet(); 162 return asThis(); 163 } 164 } 165 166 /** 167 * Builds new instances of T. 168 * 169 * @param <T> The type of {@link FileLocationStrategy} to build. 170 */ 171 public static class StrategyBuilder<T extends FileLocationStrategy> extends AbstractBuilder<T, StrategyBuilder<T>> { 172 173 /** 174 * Either set this or implement get(). 175 */ 176 private final Function<StrategyBuilder<T>, T> function; 177 178 /** 179 * Constructs a new instance for subclasses. 180 * 181 * @param function Builds an instance of T. 182 */ 183 public StrategyBuilder(final Function<StrategyBuilder<T>, T> function) { 184 this.function = Objects.requireNonNull(function, "function"); 185 } 186 187 @Override 188 public T get() { 189 return function.apply(asThis()); 190 } 191 } 192 193 /** 194 * Default schemes. 195 */ 196 private static final String DEFAULT_SCHEMES = "file,jar"; 197 /** 198 * The system property key {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes}. 199 * <p> 200 * If absent, defaults to {@code "file,jar"}. 201 * </p> 202 * <p> 203 * For complete functionality, use {@code "file,http,https,jar"}. 204 * </p> 205 */ 206 private static final String KEY_SCHEMES = "org.apache.commons.configuration2.io.FileLocationStrategy.schemes"; 207 208 private static void checkHost(final String value, final Set<Pattern> validSet) { 209 final String lowerCase = StringUtils.toRootLowerCase(value); 210 if (!validSet.isEmpty() && StringUtils.isNotEmpty(lowerCase) && validSet.stream().noneMatch(p -> p.matcher(lowerCase).matches())) { 211 throw new ConfigurationDeniedException("URL host is not enabled: %s; must be one of %s", value, validSet); 212 } 213 } 214 215 /** 216 * Checks if the scheme is allowed. 217 * 218 * @param value A URL scheme, never empty or {@code null}. 219 * @param validSet the scheme valid-set. 220 */ 221 private static void checkScheme(final String value, final Set<String> validSet) { 222 if (!validSet.isEmpty() && !validSet.contains(StringUtils.toRootLowerCase(value))) { 223 throw new ConfigurationDeniedException("URL scheme \"%s\" is not enabled, must be one of %s, override defaults with the system property \"%s\", " 224 + "complete set: \"file,http,https,jar\"", value, validSet, KEY_SCHEMES); 225 } 226 } 227 228 /** 229 * Validates {@code url} against the scheme and host allow-lists. 230 * 231 * @param url the URL to check. 232 * @param validSchemes the scheme valid-set. 233 * @param validHosts the host valid-set. 234 * @throws ConfigurationDeniedException if the URL or any embedded URL fails the check, or a {@code jar:} URL is malformed. 235 */ 236 static void checkUrl(final URL url, final Set<String> validSchemes, final Set<Pattern> validHosts) { 237 final String scheme = url.getProtocol(); 238 checkScheme(scheme, validSchemes); 239 if ("jar".equalsIgnoreCase(scheme)) { 240 try { 241 // Follows the logic of JarURLConnection#parseSpecs without the cost of opening a connection. 242 final String spec = url.getFile(); 243 final int sep = spec.lastIndexOf("!/"); 244 if (sep < 0) { 245 throw new MalformedURLException("no !/ found in url spec:" + spec); 246 } 247 final URL inner = new URL(spec.substring(0, sep)); 248 checkUrl(inner, validSchemes, validHosts); 249 } catch (final MalformedURLException e) { 250 throw new ConfigurationDeniedException(e, "Malformed 'jar:' URL: %s", url); 251 } 252 } else { 253 checkHost(url.getHost(), validHosts); 254 } 255 } 256 257 private static Set<String> getSchemesProperty() { 258 final Set<String> set = new LinkedHashSet<>(); 259 final String[] split = System.getProperty(KEY_SCHEMES, DEFAULT_SCHEMES).split(","); 260 Collections.addAll(set, split); 261 return set; 262 } 263 264 /** 265 * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive. 266 */ 267 private final Set<Pattern> hosts; 268 /** 269 * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive. 270 */ 271 private final Set<String> schemes; 272 273 /** 274 * Constructs a new instance where the enabled URL schemes are read the system property 275 * {@code "org.apache.commons.configuration2.io.FileLocationStrategy.schemes"}. 276 * <p> 277 * If absent, defaults to {@code "file,jar"}. 278 * </p> 279 * <p> 280 * For complete functionality, use {@code "file,http,https,jar"}. 281 * </p> 282 */ 283 AbstractFileLocationStrategy() { 284 this(getSchemesProperty()); 285 } 286 287 AbstractFileLocationStrategy(final AbstractBuilder<?, ?> builder) { 288 Objects.requireNonNull(builder, "builder"); 289 this.schemes = builder.schemes; 290 this.hosts = builder.hosts != null ? builder.hosts : Collections.emptySet(); 291 } 292 293 AbstractFileLocationStrategy(final Set<String> schemes) { 294 this.schemes = schemes; 295 this.hosts = Collections.emptySet(); 296 } 297 298 URL check(final URL url) { 299 if (url != null) { 300 checkUrl(url, schemes, hosts); 301 } 302 return url; 303 } 304 305 /** 306 * Gets the enabled hosts. 307 * 308 * @return the enabled hosts. 309 */ 310 Set<Pattern> getHosts() { 311 return hosts; 312 } 313 314 /** 315 * Gets the enabled schemes. 316 * 317 * @return the enabled schemes. 318 */ 319 Set<String> getSchemes() { 320 return schemes; 321 } 322 323 @Override 324 public String toString() { 325 return getClass().getSimpleName() + " [schemes=" + schemes + ", hosts=" + hosts + "]"; 326 } 327}