Package org.apache.sysds.runtime.util
Class HDFSTool
- java.lang.Object
-
- org.apache.sysds.runtime.util.HDFSTool
-
public class HDFSTool extends Object
-
-
Field Summary
Fields Modifier and Type Field Description static StringDFS_BLOCKSIZEstatic StringDFS_PERMISSIONS_ENABLEDstatic StringDFS_REPLICATIONstatic StringFS_DEFAULTFSstatic StringIO_FILE_BUFFER_SIZEstatic StringIO_SERIALIZATIONSstatic booleanUSE_BINARYBLOCK_SERIALIZATION
-
Constructor Summary
Constructors Constructor Description HDFSTool()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static voidaddBinaryBlockSerializationFramework(org.apache.hadoop.conf.Configuration job)static voidcopyFileOnHDFS(String originalDir, String newDir)static voidcreateDirIfNotExistOnHDFS(String dir, String permissions)static voidcreateDirIfNotExistOnHDFS(org.apache.hadoop.fs.Path path, String permissions)static voiddeleteFileIfExistOnHDFS(String dir)static voiddeleteFileIfExistOnHDFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job)static voiddeleteFileIfExistOnLFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job)static voiddeleteFileWithMTDIfExistOnHDFS(String fname)static longestimateNnzBasedOnFileSize(org.apache.hadoop.fs.Path path, long rlen, long clen, int blen, double factor)static booleanexistsFileOnHDFS(String fname)static org.apache.hadoop.fs.FileStatus[]getDirectoryListing(String fname)static longgetFilesizeOnHDFS(org.apache.hadoop.fs.Path path)Returns the size of a file or directory on hdfs in bytes.static org.apache.hadoop.fs.FSDataOutputStreamgetHDFSDataOutputStream(String filename, boolean overwrite)static booleanisDirectory(String fname)static booleanisFileEmpty(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dir)static booleanisHDFSFileEmpty(String dir)static voidmergeIntoSingleFile(String originalDir, String newFile)static StringmetaDataToString(Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)static booleanreadBooleanFromHDFSFile(String filename)static double[]readColumnVectorFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen)static doublereadDoubleFromHDFSFile(String filename)static longreadIntegerFromHDFSFile(String filename)static double[][]readMatrixFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen)static ObjectreadObjectFromHDFSFile(String filename, Types.ValueType vt)static ScalarObjectreadScalarObjectFromHDFSFile(String fname, Types.ValueType vt)static StringreadStringFromHDFSFile(String filename)static voidrenameFileOnHDFS(String originalDir, String newDir)static voidwriteBooleanToHDFS(boolean b, String filename)static voidwriteDoubleToHDFS(double d, String filename)static voidwriteIntToHDFS(long i, String filename)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)static voidwriteMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint)static voidwriteObjectToHDFS(Object obj, String filename)static voidwriteScalarMetaDataFile(String mtdfile, Types.ValueType vt)static voidwriteScalarMetaDataFile(String mtdfile, Types.ValueType vt, PrivacyConstraint privacyConstraint)static voidwriteScalarToHDFS(ScalarObject scalar, String fname)Helper function to write scalars to HDFS, including writing its meta data and removing CRC files in local file systemstatic voidwriteStringToHDFS(String s, String filename)
-
-
-
Field Detail
-
DFS_REPLICATION
public static final String DFS_REPLICATION
- See Also:
- Constant Field Values
-
IO_FILE_BUFFER_SIZE
public static final String IO_FILE_BUFFER_SIZE
- See Also:
- Constant Field Values
-
IO_SERIALIZATIONS
public static final String IO_SERIALIZATIONS
- See Also:
- Constant Field Values
-
DFS_BLOCKSIZE
public static final String DFS_BLOCKSIZE
- See Also:
- Constant Field Values
-
DFS_PERMISSIONS_ENABLED
public static final String DFS_PERMISSIONS_ENABLED
- See Also:
- Constant Field Values
-
FS_DEFAULTFS
public static final String FS_DEFAULTFS
- See Also:
- Constant Field Values
-
USE_BINARYBLOCK_SERIALIZATION
public static final boolean USE_BINARYBLOCK_SERIALIZATION
- See Also:
- Constant Field Values
-
-
Method Detail
-
addBinaryBlockSerializationFramework
public static void addBinaryBlockSerializationFramework(org.apache.hadoop.conf.Configuration job)
-
existsFileOnHDFS
public static boolean existsFileOnHDFS(String fname)
-
isDirectory
public static boolean isDirectory(String fname)
-
getDirectoryListing
public static org.apache.hadoop.fs.FileStatus[] getDirectoryListing(String fname)
-
deleteFileWithMTDIfExistOnHDFS
public static void deleteFileWithMTDIfExistOnHDFS(String fname) throws IOException
- Throws:
IOException
-
deleteFileIfExistOnHDFS
public static void deleteFileIfExistOnHDFS(String dir) throws IOException
- Throws:
IOException
-
deleteFileIfExistOnHDFS
public static void deleteFileIfExistOnHDFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job) throws IOException- Throws:
IOException
-
deleteFileIfExistOnLFS
public static void deleteFileIfExistOnLFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job) throws IOException- Throws:
IOException
-
isHDFSFileEmpty
public static boolean isHDFSFileEmpty(String dir) throws IOException
- Throws:
IOException
-
isFileEmpty
public static boolean isFileEmpty(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dir) throws IOException- Throws:
IOException
-
renameFileOnHDFS
public static void renameFileOnHDFS(String originalDir, String newDir) throws IOException
- Throws:
IOException
-
mergeIntoSingleFile
public static void mergeIntoSingleFile(String originalDir, String newFile) throws IOException
- Throws:
IOException
-
copyFileOnHDFS
public static void copyFileOnHDFS(String originalDir, String newDir) throws IOException
- Throws:
IOException
-
estimateNnzBasedOnFileSize
public static long estimateNnzBasedOnFileSize(org.apache.hadoop.fs.Path path, long rlen, long clen, int blen, double factor) throws IOException- Throws:
IOException
-
getFilesizeOnHDFS
public static long getFilesizeOnHDFS(org.apache.hadoop.fs.Path path) throws IOExceptionReturns the size of a file or directory on hdfs in bytes.- Parameters:
path- file system path- Returns:
- file size
- Throws:
IOException- if IOException occurs
-
readDoubleFromHDFSFile
public static double readDoubleFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readIntegerFromHDFSFile
public static long readIntegerFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readBooleanFromHDFSFile
public static boolean readBooleanFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readStringFromHDFSFile
public static String readStringFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readObjectFromHDFSFile
public static Object readObjectFromHDFSFile(String filename, Types.ValueType vt) throws IOException
- Throws:
IOException
-
readScalarObjectFromHDFSFile
public static ScalarObject readScalarObjectFromHDFSFile(String fname, Types.ValueType vt)
-
writeScalarToHDFS
public static void writeScalarToHDFS(ScalarObject scalar, String fname)
Helper function to write scalars to HDFS, including writing its meta data and removing CRC files in local file system- Parameters:
scalar- scalar data objectfname- file name
-
writeDoubleToHDFS
public static void writeDoubleToHDFS(double d, String filename) throws IOException- Throws:
IOException
-
writeIntToHDFS
public static void writeIntToHDFS(long i, String filename) throws IOException- Throws:
IOException
-
writeBooleanToHDFS
public static void writeBooleanToHDFS(boolean b, String filename) throws IOException- Throws:
IOException
-
writeStringToHDFS
public static void writeStringToHDFS(String s, String filename) throws IOException
- Throws:
IOException
-
writeObjectToHDFS
public static void writeObjectToHDFS(Object obj, String filename) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeScalarMetaDataFile
public static void writeScalarMetaDataFile(String mtdfile, Types.ValueType vt) throws IOException
- Throws:
IOException
-
writeScalarMetaDataFile
public static void writeScalarMetaDataFile(String mtdfile, Types.ValueType vt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
metaDataToString
public static String metaDataToString(Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws org.apache.wink.json4j.JSONException, DMLRuntimeException
- Throws:
org.apache.wink.json4j.JSONExceptionDMLRuntimeException
-
readMatrixFromHDFS
public static double[][] readMatrixFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen) throws IOException, DMLRuntimeException
- Throws:
IOExceptionDMLRuntimeException
-
readColumnVectorFromHDFS
public static double[] readColumnVectorFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen) throws IOException, DMLRuntimeException
- Throws:
IOExceptionDMLRuntimeException
-
createDirIfNotExistOnHDFS
public static void createDirIfNotExistOnHDFS(String dir, String permissions) throws IOException
- Throws:
IOException
-
createDirIfNotExistOnHDFS
public static void createDirIfNotExistOnHDFS(org.apache.hadoop.fs.Path path, String permissions) throws IOException- Throws:
IOException
-
getHDFSDataOutputStream
public static org.apache.hadoop.fs.FSDataOutputStream getHDFSDataOutputStream(String filename, boolean overwrite) throws IOException
- Throws:
IOException
-
-