SparkFiles.
get
Get the absolute path of a file added through SparkContext.addFile() or SparkContext.addPyFile().
SparkContext.addFile()
SparkContext.addPyFile()
New in version 0.7.0.
file that are added to resources
the absolute path of the file
See also
SparkFiles.getRootDirectory()
SparkContext.listFiles()
Examples
>>> import os >>> import tempfile >>> from pyspark import SparkFiles
>>> with tempfile.TemporaryDirectory() as d: ... path1 = os.path.join(d, "test.txt") ... with open(path1, "w") as f: ... _ = f.write("100") ... ... sc.addFile(path1) ... file_list1 = sorted(sc.listFiles) ... ... def func1(iterator): ... path = SparkFiles.get("test.txt") ... assert path.startswith(SparkFiles.getRootDirectory()) ... return [path] ... ... path_list1 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func1).collect() ... ... path2 = os.path.join(d, "test.py") ... with open(path2, "w") as f: ... _ = f.write("import pyspark") ... ... # py files ... sc.addPyFile(path2) ... file_list2 = sorted(sc.listFiles) ... ... def func2(iterator): ... path = SparkFiles.get("test.py") ... assert path.startswith(SparkFiles.getRootDirectory()) ... return [path] ... ... path_list2 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func2).collect() >>> file_list1 ['file:/.../test.txt'] >>> set(path_list1) {'.../test.txt'} >>> file_list2 ['file:/.../test.py', 'file:/.../test.txt'] >>> set(path_list2) {'.../test.py'}