-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from RyogaMasaki/rewrite
Rewrite
- Loading branch information
Showing
21 changed files
with
832 additions
and
1,243 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Language: Cpp | ||
Standard: Cpp11 | ||
BasedOnStyle: LLVM | ||
BreakBeforeBraces: Linux | ||
SpaceBeforeParens: Never | ||
TabWidth: 2 | ||
UseTab: Always | ||
AlignTrailingComments: true | ||
AllowShortIfStatementsOnASingleLine: false | ||
IndentCaseLabels: true | ||
|
||
Cpp11BracedListStyle: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
*.code-workspace | ||
bin | ||
build | ||
|
||
etc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
INCLUDE (CheckIncludeFiles) | ||
|
||
# define project | ||
cmake_minimum_required (VERSION 3.5) | ||
project (jstrings VERSION 1.1 LANGUAGES CXX) | ||
|
||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_CXX_COMPILER_NAMES clang++ g++ icpc c++ cxx) | ||
# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin) | ||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") | ||
|
||
if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt) | ||
if (NOT CMAKE_BUILD_TYPE) | ||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) | ||
endif() | ||
endif() | ||
|
||
# define target | ||
aux_source_directory(${PROJECT_SOURCE_DIR}/src CPPFILES) | ||
add_executable(jstrings ${CPPFILES}) | ||
|
||
target_include_directories(jstrings PUBLIC "${PROJECT_SOURCE_DIR}/inc") | ||
target_compile_features(jstrings PUBLIC cxx_std_11) | ||
target_link_libraries(jstrings png) | ||
|
||
install(TARGETS jstrings | ||
RUNTIME DESTINATION bin) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,47 @@ | ||
# jstrings | ||
A tool for finding JIS-based Japanese characters in binary data. | ||
A tool for finding JIS-based Japanese text in binary data. | ||
|
||
## Usage | ||
jstrings [options] [input] | ||
jstrings [options] [input_file] | ||
|
||
Input can be a filename or data from stdin. | ||
Input can be a filename or data from stdin. Output is sent to stdout. | ||
|
||
### Options | ||
-m number | ||
-e encoding | ||
--encoding encoding | ||
|
||
Set minimum number of characters to match as a valid string. Default: 10. | ||
Specify the encoding to use. Use one of the strings listed in parantheses below for that encoding: | ||
|
||
-e encoding | ||
* Shift-JIS (shift-jis, shiftjis, sjis) | ||
* EUC-JP (euc, euc-jp, eucjp) | ||
* Microsoft CP932 (cp932, windows932, windows31j) | ||
|
||
Specify the encoding to use. Currently, the only valid value is "shift-jis". Default: shift-jis | ||
Optional; default is Shift-JIS. | ||
|
||
-l | ||
-m number | ||
--match-length number | ||
|
||
Use little-endian order for multibyte characters | ||
Set minimum number of characters to match as a valid string. Optional; default is 10. | ||
|
||
-jisx0213 | ||
-c number | ||
--cutoff number | ||
|
||
Use JIS X 0213 character set instead of JIS X 0208 for double byte characters | ||
Limit the output to the specified number of characters for a string. This is useful for "previewing" a file which may have large blocks of junk data that happen to fall within the range of valid encoding values. Optional; default is no cutoff. | ||
|
||
## Notes | ||
## Output | ||
Data is output in its original encoding without any conversion. Other tools, such as iconv, can do conversion to something more useful (such as UTF8). For example: | ||
|
||
jstrings file.bin | iconv -f SHIFT-JIS -t UTF-8 -c | ||
# for Shift-JIS | ||
jstrings file.bin | iconv -f SHIFT-JIS -t UTF-8 -c | less | ||
# for CP932 | ||
jstrings file.bin | iconv -f CP932 -t UTF-8 -c | less | ||
# for EUC-JP | ||
jstrings file.bin | iconv -f EUC-JP -t UTF-8 -c | less | ||
|
||
### To Do | ||
- Add support for other JIS encodings: CP932, EUC | ||
- Add support for JIS X 0212 for non-SJIS encodings (only EUC?) | ||
- Add option to only return strings with double-byte characters present | ||
## Building | ||
CMake is used for the build system. From the root directory: | ||
|
||
mkdir build && cd build | ||
cmake .. | ||
make | ||
sudo make install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
|
||
#ifndef ENC_CP932_H | ||
#define ENC_CP932_H | ||
|
||
#include "enc_shiftjis.h" | ||
|
||
namespace encodings | ||
{ | ||
|
||
class cp932 : public shift_jis | ||
{ | ||
public: | ||
u8 is_valid(u8 const *data); | ||
}; | ||
|
||
} // namespace encodings | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
#ifndef ENC_EUCJP_H | ||
#define ENC_EUCJP_H | ||
#include "encoding.h" | ||
|
||
namespace encodings | ||
{ | ||
|
||
class euc : public encoding | ||
{ | ||
public: | ||
euc() : encoding(3){}; | ||
u8 is_valid(u8 const *data); | ||
}; | ||
|
||
} // namespace encodings | ||
#endif // ENC_EUC_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
|
||
#ifndef ENC_SHIFTJIS_H | ||
#define ENC_SHIFTJIS_H | ||
#include "encoding.h" | ||
|
||
namespace encodings | ||
{ | ||
|
||
class shift_jis : public encoding | ||
{ | ||
public: | ||
shift_jis() : encoding(2){}; | ||
u8 is_valid(u8 const *data); | ||
}; | ||
|
||
} // namespace encodings | ||
#endif // ENC_SHIFTJIS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
|
||
#ifndef ENCODING_H | ||
#define ENCODING_H | ||
#include "types.h" | ||
|
||
/*! | ||
* \brief Abstract for encoding classes | ||
*/ | ||
class encoding | ||
{ | ||
public: | ||
encoding(u8 max_seq_len) { this->max_seq_len = max_seq_len; } | ||
|
||
/*! | ||
* \brief Determines if the given bytes are a valid byte sequence for the | ||
* encoding. Returns the number of valid bytes if true. | ||
*/ | ||
virtual u8 is_valid(u8 const *data) = 0; | ||
|
||
const u8 get_max_seq_len() { return this->max_seq_len; } | ||
|
||
protected: | ||
u8 max_seq_len; | ||
}; | ||
|
||
#endif // ENCODING_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
|
||
#include <getopt.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#include <fstream> | ||
#include <iomanip> | ||
#include <iostream> | ||
|
||
#include "enc_cp932.h" | ||
#include "enc_eucjp.h" | ||
#include "enc_shiftjis.h" | ||
#include "encoding.h" | ||
#include "types.h" | ||
|
||
/*! | ||
* \enum encodings | ||
* \brief List of supported encodings | ||
*/ | ||
enum enctypes { shift_jis, cp932, eucjp }; | ||
|
||
void process_args(int argc, char **argv); | ||
|
||
void print_help(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/*! | ||
* \author Damian Rogers ([email protected]) | ||
* \version 1.1 | ||
* \date 2019.12.01 | ||
* \copyright MIT License | ||
*/ | ||
|
||
#ifndef TYPES_H | ||
#define TYPES_H | ||
|
||
#include <map> | ||
#include <memory> | ||
#include <stdint.h> | ||
#include <vector> | ||
|
||
typedef uint8_t u8; | ||
typedef uint16_t u16; | ||
typedef uint32_t u32; | ||
typedef uint64_t u64; | ||
|
||
typedef int8_t s8; | ||
typedef int16_t s16; | ||
typedef int32_t s32; | ||
typedef int64_t s64; | ||
|
||
template <typename T> using uptr = std::unique_ptr<T>; | ||
template <typename T> using sptr = std::shared_ptr<T>; | ||
|
||
typedef std::map<std::string const, std::string> kvmap; | ||
|
||
/*! | ||
* \brief POD structure for containing a found string | ||
*/ | ||
class found_string | ||
{ | ||
public: | ||
/*! | ||
* \brief The offset of the beginning of the found string relative to the | ||
* start of the stream | ||
*/ | ||
off_t address; | ||
/*! | ||
* \brief The extracted string data | ||
*/ | ||
std::vector<uint8_t> data; | ||
}; | ||
|
||
#endif |
Oops, something went wrong.