Revision 4f69c98ae95681cf972fa6701c94dbbb28e40d80 authored by sychen on 09 September 2022, 21:36:39 UTC, committed by Dongjoon Hyun on 09 September 2022, 21:36:39 UTC
### What changes were proposed in this pull request?
Increase ORC test coverage.
[ORC-1205](https://issues.apache.org/jira/browse/ORC-1205) Size of batches in some ConvertTreeReaders should be ensured before using

### Why are the changes needed?

When spark reads an orc with type promotion, an `ArrayIndexOutOfBoundsException` may be thrown, which has been fixed in version 1.7.6 and 1.8.0.

```java
java.lang.ArrayIndexOutOfBoundsException: 1
        at org.apache.orc.impl.TreeReaderFactory$TreeReader.nextVector(TreeReaderFactory.java:387)
        at org.apache.orc.impl.TreeReaderFactory$LongTreeReader.nextVector(TreeReaderFactory.java:740)
        at org.apache.orc.impl.ConvertTreeReaderFactory$StringGroupFromAnyIntegerTreeReader.nextVector(ConvertTreeReaderFactory.java:1069)
        at org.apache.orc.impl.reader.tree.StructBatchReader.readBatchColumn(StructBatchReader.java:65)
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
add UT

Closes #37808 from cxzl25/SPARK-39830-3.3.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent aaa8292
Raw File
appveyor.yml
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

version: "{build}-{branch}"

shallow_clone: true

platform: x64
configuration: Debug

branches:
  only:
    - master

only_commits:
  files:
    - appveyor.yml
    - dev/appveyor-install-dependencies.ps1
    - R/
    - sql/core/src/main/scala/org/apache/spark/sql/api/r/
    - core/src/main/scala/org/apache/spark/api/r/
    - mllib/src/main/scala/org/apache/spark/ml/r/
    - core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
    - bin/*.cmd

cache:
  - C:\Users\appveyor\.m2

install:
  # Install SBT and dependencies
  - ps: .\dev\appveyor-install-dependencies.ps1
  # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat.
  - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
  - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"

build_script:
  # '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
  # See SPARK-28759.
  # Ideally we should check the tests related to Hive in SparkR as well (SPARK-31745).
  - cmd: set SBT_MAVEN_PROFILES=-Psparkr
  - cmd: set SBT_OPTS=-Djna.nosys=true -Dfile.encoding=UTF-8 -Xms4096m -Xms4096m -XX:ReservedCodeCacheSize=128m
  - cmd: sbt package
  - cmd: set SBT_MAVEN_PROFILES=
  - cmd: set SBT_OPTS=

environment:
  NOT_CRAN: true
  # See SPARK-27848. Currently installing some dependent packages causes
  # "(converted from warning) unable to identify current timezone 'C':" for an unknown reason.
  # This environment variable works around to test SparkR against a higher version.
  R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

test_script:
  - cmd: .\bin\spark-submit2.cmd --driver-java-options "-Dlog4j.configuration=file:///%CD:\=/%/R/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R

notifications:
  - provider: Email
    on_build_success: false
    on_build_failure: false
    on_build_status_changed: false
back to top