Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/libgit2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'include/git2/diff.h')
-rw-r--r--include/git2/diff.h863
1 files changed, 725 insertions, 138 deletions
diff --git a/include/git2/diff.h b/include/git2/diff.h
index bafe6268c..d9ceadf20 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009-2012 the libgit2 contributors
+ * Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
@@ -17,6 +17,9 @@
* @file git2/diff.h
* @brief Git tree and file differencing routines.
*
+ * Overview
+ * --------
+ *
* Calculating diffs is generally done in two phases: building a diff list
* then traversing the diff list. This makes is easier to share logic
* across the various types of diffs (tree vs tree, workdir vs index, etc.),
@@ -24,156 +27,327 @@
* such as rename detected, in between the steps. When you are done with a
* diff list object, it must be freed.
*
+ * Terminology
+ * -----------
+ *
+ * To understand the diff APIs, you should know the following terms:
+ *
+ * - A `diff` or `diff list` represents the cumulative list of differences
+ * between two snapshots of a repository (possibly filtered by a set of
+ * file name patterns). This is the `git_diff_list` object.
+ * - A `delta` is a file pair with an old and new revision. The old version
+ * may be absent if the file was just created and the new version may be
+ * absent if the file was deleted. A diff is mostly just a list of deltas.
+ * - A `binary` file / delta is a file (or pair) for which no text diffs
+ * should be generated. A diff list can contain delta entries that are
+ * binary, but no diff content will be output for those files. There is
+ * a base heuristic for binary detection and you can further tune the
+ * behavior with git attributes or diff flags and option settings.
+ * - A `hunk` is a span of modified lines in a delta along with some stable
+ * surrounding context. You can configure the amount of context and other
+ * properties of how hunks are generated. Each hunk also comes with a
+ * header that described where it starts and ends in both the old and new
+ * versions in the delta.
+ * - A `line` is a range of characters inside a hunk. It could be a context
+ * line (i.e. in both old and new versions), an added line (i.e. only in
+ * the new version), or a removed line (i.e. only in the old version).
+ * Unfortunately, we don't know anything about the encoding of data in the
+ * file being diffed, so we cannot tell you much about the line content.
+ * Line data will not be NUL-byte terminated, however, because it will be
+ * just a span of bytes inside the larger file.
+ *
* @ingroup Git
* @{
*/
GIT_BEGIN_DECL
-enum {
+/**
+ * Flags for diff options. A combination of these flags can be passed
+ * in via the `flags` value in the `git_diff_options`.
+ */
+typedef enum {
+ /** Normal diff, the default */
GIT_DIFF_NORMAL = 0,
+ /** Reverse the sides of the diff */
GIT_DIFF_REVERSE = (1 << 0),
+ /** Treat all files as text, disabling binary attributes & detection */
GIT_DIFF_FORCE_TEXT = (1 << 1),
+ /** Ignore all whitespace */
GIT_DIFF_IGNORE_WHITESPACE = (1 << 2),
+ /** Ignore changes in amount of whitespace */
GIT_DIFF_IGNORE_WHITESPACE_CHANGE = (1 << 3),
+ /** Ignore whitespace at end of line */
GIT_DIFF_IGNORE_WHITESPACE_EOL = (1 << 4),
+ /** Exclude submodules from the diff completely */
GIT_DIFF_IGNORE_SUBMODULES = (1 << 5),
+ /** Use the "patience diff" algorithm (currently unimplemented) */
GIT_DIFF_PATIENCE = (1 << 6),
+ /** Include ignored files in the diff list */
GIT_DIFF_INCLUDE_IGNORED = (1 << 7),
+ /** Include untracked files in the diff list */
GIT_DIFF_INCLUDE_UNTRACKED = (1 << 8),
+ /** Include unmodified files in the diff list */
GIT_DIFF_INCLUDE_UNMODIFIED = (1 << 9),
+ /** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked directory
+ * will be marked with only a single entry in the diff list; this flag
+ * adds all files under the directory as UNTRACKED entries, too.
+ */
GIT_DIFF_RECURSE_UNTRACKED_DIRS = (1 << 10),
-};
+ /** If the pathspec is set in the diff options, this flags means to
+ * apply it as an exact match instead of as an fnmatch pattern.
+ */
+ GIT_DIFF_DISABLE_PATHSPEC_MATCH = (1 << 11),
+ /** Use case insensitive filename comparisons */
+ GIT_DIFF_DELTAS_ARE_ICASE = (1 << 12),
+ /** When generating patch text, include the content of untracked files */
+ GIT_DIFF_INCLUDE_UNTRACKED_CONTENT = (1 << 13),
+ /** Disable updating of the `binary` flag in delta records. This is
+ * useful when iterating over a diff if you don't need hunk and data
+ * callbacks and want to avoid having to load file completely.
+ */
+ GIT_DIFF_SKIP_BINARY_CHECK = (1 << 14),
+ /** Normally, a type change between files will be converted into a
+ * DELETED record for the old and an ADDED record for the new; this
+ * options enabled the generation of TYPECHANGE delta records.
+ */
+ GIT_DIFF_INCLUDE_TYPECHANGE = (1 << 15),
+ /** Even with GIT_DIFF_INCLUDE_TYPECHANGE, blob->tree changes still
+ * generally show as a DELETED blob. This flag tries to correctly
+ * label blob->tree transitions as TYPECHANGE records with new_file's
+ * mode set to tree. Note: the tree SHA will not be available.
+ */
+ GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1 << 16),
+ /** Ignore file mode changes */
+ GIT_DIFF_IGNORE_FILEMODE = (1 << 17),
+ /** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
+ * will be marked with only a single entry in the diff list; this flag
+ * adds all files under the directory as IGNORED entries, too.
+ */
+ GIT_DIFF_RECURSE_IGNORED_DIRS = (1 << 18),
+} git_diff_option_t;
/**
- * Structure describing options about how the diff should be executed.
- *
- * Setting all values of the structure to zero will yield the default
- * values. Similarly, passing NULL for the options structure will
- * give the defaults. The default values are marked below.
+ * The diff list object that contains all individual file deltas.
*
- * @todo Most of the parameters here are not actually supported at this time.
+ * This is an opaque structure which will be allocated by one of the diff
+ * generator functions below (such as `git_diff_tree_to_tree`). You are
+ * responsible for releasing the object memory when done, using the
+ * `git_diff_list_free()` function.
*/
-typedef struct {
- uint32_t flags; /**< defaults to GIT_DIFF_NORMAL */
- uint16_t context_lines; /**< defaults to 3 */
- uint16_t interhunk_lines; /**< defaults to 3 */
- char *old_prefix; /**< defaults to "a" */
- char *new_prefix; /**< defaults to "b" */
- git_strarray pathspec; /**< defaults to show all paths */
-} git_diff_options;
+typedef struct git_diff_list git_diff_list;
/**
- * The diff list object that contains all individual file deltas.
+ * Flags for the delta object and the file objects on each side.
+ *
+ * These flags are used for both the `flags` value of the `git_diff_delta`
+ * and the flags for the `git_diff_file` objects representing the old and
+ * new sides of the delta. Values outside of this public range should be
+ * considered reserved for internal or future use.
*/
-typedef struct git_diff_list git_diff_list;
-
-enum {
- GIT_DIFF_FILE_VALID_OID = (1 << 0),
- GIT_DIFF_FILE_FREE_PATH = (1 << 1),
- GIT_DIFF_FILE_BINARY = (1 << 2),
- GIT_DIFF_FILE_NOT_BINARY = (1 << 3),
- GIT_DIFF_FILE_FREE_DATA = (1 << 4),
- GIT_DIFF_FILE_UNMAP_DATA = (1 << 5)
-};
+typedef enum {
+ GIT_DIFF_FLAG_BINARY = (1 << 0), /** file(s) treated as binary data */
+ GIT_DIFF_FLAG_NOT_BINARY = (1 << 1), /** file(s) treated as text data */
+ GIT_DIFF_FLAG_VALID_OID = (1 << 2), /** `oid` value is known correct */
+} git_diff_flag_t;
/**
* What type of change is described by a git_diff_delta?
+ *
+ * `GIT_DELTA_RENAMED` and `GIT_DELTA_COPIED` will only show up if you run
+ * `git_diff_find_similar()` on the diff list object.
+ *
+ * `GIT_DELTA_TYPECHANGE` only shows up given `GIT_DIFF_INCLUDE_TYPECHANGE`
+ * in the option flags (otherwise type changes will be split into ADDED /
+ * DELETED pairs).
*/
typedef enum {
- GIT_DELTA_UNMODIFIED = 0,
- GIT_DELTA_ADDED = 1,
- GIT_DELTA_DELETED = 2,
- GIT_DELTA_MODIFIED = 3,
- GIT_DELTA_RENAMED = 4,
- GIT_DELTA_COPIED = 5,
- GIT_DELTA_IGNORED = 6,
- GIT_DELTA_UNTRACKED = 7
+ GIT_DELTA_UNMODIFIED = 0, /** no changes */
+ GIT_DELTA_ADDED = 1, /** entry does not exist in old version */
+ GIT_DELTA_DELETED = 2, /** entry does not exist in new version */
+ GIT_DELTA_MODIFIED = 3, /** entry content changed between old and new */
+ GIT_DELTA_RENAMED = 4, /** entry was renamed between old and new */
+ GIT_DELTA_COPIED = 5, /** entry was copied from another old entry */
+ GIT_DELTA_IGNORED = 6, /** entry is ignored item in workdir */
+ GIT_DELTA_UNTRACKED = 7, /** entry is untracked item in workdir */
+ GIT_DELTA_TYPECHANGE = 8, /** type of entry changed between old and new */
} git_delta_t;
/**
- * Description of one side of a diff.
+ * Description of one side of a diff entry.
+ *
+ * Although this is called a "file", it may actually represent a file, a
+ * symbolic link, a submodule commit id, or even a tree (although that only
+ * if you are tracking type changes or ignored/untracked directories).
+ *
+ * The `oid` is the `git_oid` of the item. If the entry represents an
+ * absent side of a diff (e.g. the `old_file` of a `GIT_DELTA_ADDED` delta),
+ * then the oid will be zeroes.
+ *
+ * `path` is the NUL-terminated path to the entry relative to the working
+ * directory of the repository.
+ *
+ * `size` is the size of the entry in bytes.
+ *
+ * `flags` is a combination of the `git_diff_flag_t` types
+ *
+ * `mode` is, roughly, the stat() `st_mode` value for the item. This will
+ * be restricted to one of the `git_filemode_t` values.
*/
typedef struct {
- git_oid oid;
- char *path;
- uint16_t mode;
- git_off_t size;
- unsigned int flags;
+ git_oid oid;
+ const char *path;
+ git_off_t size;
+ uint32_t flags;
+ uint16_t mode;
} git_diff_file;
/**
- * Description of changes to one file.
+ * Description of changes to one entry.
*
- * When iterating over a diff list object, this will generally be passed to
- * most callback functions and you can use the contents to understand
- * exactly what has changed.
+ * When iterating over a diff list object, this will be passed to most
+ * callback functions and you can use the contents to understand exactly
+ * what has changed.
*
- * Under some circumstances, not all fields will be filled in, but the code
- * generally tries to fill in as much as possible. One example is that the
- * "binary" field will not actually look at file contents if you do not
- * pass in hunk and/or line callbacks to the diff foreach iteration function.
- * It will just use the git attributes for those files.
+ * The `old_file` represents the "from" side of the diff and the `new_file`
+ * represents to "to" side of the diff. What those means depend on the
+ * function that was used to generate the diff and will be documented below.
+ * You can also use the `GIT_DIFF_REVERSE` flag to flip it around.
+ *
+ * Although the two sides of the delta are named "old_file" and "new_file",
+ * they actually may correspond to entries that represent a file, a symbolic
+ * link, a submodule commit id, or even a tree (if you are tracking type
+ * changes or ignored/untracked directories).
+ *
+ * Under some circumstances, in the name of efficiency, not all fields will
+ * be filled in, but we generally try to fill in as much as possible. One
+ * example is that the "flags" field may not have either the `BINARY` or the
+ * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
+ * in hunk and/or line callbacks to the diff foreach iteration function. It
+ * will just use the git attributes for those files.
*/
typedef struct {
git_diff_file old_file;
git_diff_file new_file;
git_delta_t status;
- unsigned int similarity; /**< for RENAMED and COPIED, value 0-100 */
- int binary;
+ uint32_t similarity; /**< for RENAMED and COPIED, value 0-100 */
+ uint32_t flags;
} git_diff_delta;
/**
+ * Diff notification callback function.
+ *
+ * The callback will be called for each file, just before the `git_delta_t`
+ * gets inserted into the diff list.
+ *
+ * When the callback:
+ * - returns < 0, the diff process will be aborted.
+ * - returns > 0, the delta will not be inserted into the diff list, but the
+ * diff process continues.
+ * - returns 0, the delta is inserted into the diff list, and the diff process
+ * continues.
+ */
+typedef int (*git_diff_notify_cb)(
+ const git_diff_list *diff_so_far,
+ const git_diff_delta *delta_to_add,
+ const char *matched_pathspec,
+ void *payload);
+
+/**
+ * Structure describing options about how the diff should be executed.
+ *
+ * Setting all values of the structure to zero will yield the default
+ * values. Similarly, passing NULL for the options structure will
+ * give the defaults. The default values are marked below.
+ *
+ * - `flags` is a combination of the `git_diff_option_t` values above
+ * - `context_lines` is the number of unchanged lines that define the
+ * boundary of a hunk (and to display before and after)
+ * - `interhunk_lines` is the maximum number of unchanged lines between
+ * hunk boundaries before the hunks will be merged into a one.
+ * - `old_prefix` is the virtual "directory" to prefix to old file names
+ * in hunk headers (default "a")
+ * - `new_prefix` is the virtual "directory" to prefix to new file names
+ * in hunk headers (default "b")
+ * - `pathspec` is an array of paths / fnmatch patterns to constrain diff
+ * - `max_size` is a file size (in bytes) above which a blob will be marked
+ * as binary automatically; pass a negative value to disable.
+ * - `notify_cb` is an optional callback function, notifying the consumer of
+ * which files are being examined as the diff is generated
+ * - `notify_payload` is the payload data to pass to the `notify_cb` function
+ */
+typedef struct {
+ unsigned int version; /**< version for the struct */
+ uint32_t flags; /**< defaults to GIT_DIFF_NORMAL */
+ uint16_t context_lines; /**< defaults to 3 */
+ uint16_t interhunk_lines; /**< defaults to 0 */
+ const char *old_prefix; /**< defaults to "a" */
+ const char *new_prefix; /**< defaults to "b" */
+ git_strarray pathspec; /**< defaults to include all paths */
+ git_off_t max_size; /**< defaults to 512MB */
+ git_diff_notify_cb notify_cb;
+ void *notify_payload;
+} git_diff_options;
+
+#define GIT_DIFF_OPTIONS_VERSION 1
+#define GIT_DIFF_OPTIONS_INIT {GIT_DIFF_OPTIONS_VERSION, GIT_DIFF_NORMAL, 3}
+
+/**
* When iterating over a diff, callback that will be made per file.
+ *
+ * @param delta A pointer to the delta data for the file
+ * @param progress Goes from 0 to 1 over the diff list
+ * @param payload User-specified pointer from foreach function
*/
-typedef int (*git_diff_file_fn)(
- void *cb_data,
- git_diff_delta *delta,
- float progress);
+typedef int (*git_diff_file_cb)(
+ const git_diff_delta *delta,
+ float progress,
+ void *payload);
/**
* Structure describing a hunk of a diff.
*/
typedef struct {
- int old_start;
- int old_lines;
- int new_start;
- int new_lines;
+ int old_start; /** Starting line number in old_file */
+ int old_lines; /** Number of lines in old_file */
+ int new_start; /** Starting line number in new_file */
+ int new_lines; /** Number of lines in new_file */
} git_diff_range;
/**
* When iterating over a diff, callback that will be made per hunk.
*/
-typedef int (*git_diff_hunk_fn)(
- void *cb_data,
- git_diff_delta *delta,
- git_diff_range *range,
+typedef int (*git_diff_hunk_cb)(
+ const git_diff_delta *delta,
+ const git_diff_range *range,
const char *header,
- size_t header_len);
+ size_t header_len,
+ void *payload);
/**
* Line origin constants.
*
* These values describe where a line came from and will be passed to
- * the git_diff_data_fn when iterating over a diff. There are some
- * special origin contants at the end that are used for the text
+ * the git_diff_data_cb when iterating over a diff. There are some
+ * special origin constants at the end that are used for the text
* output callbacks to demarcate lines that are actually part of
* the file or hunk headers.
*/
-enum {
- /* these values will be sent to `git_diff_data_fn` along with the line */
+typedef enum {
+ /* These values will be sent to `git_diff_data_cb` along with the line */
GIT_DIFF_LINE_CONTEXT = ' ',
GIT_DIFF_LINE_ADDITION = '+',
GIT_DIFF_LINE_DELETION = '-',
- GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< LF was added at end of file */
+ GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< Removed line w/o LF & added one with */
GIT_DIFF_LINE_DEL_EOFNL = '\0', /**< LF was removed at end of file */
- /* these values will only be sent to a `git_diff_data_fn` when the content
- * of a diff is being formatted (eg. through git_diff_print_patch() or
- * git_diff_print_compact(), for instance).
+
+ /* The following values will only be sent to a `git_diff_data_cb` when
+ * the content of a diff is being formatted (eg. through
+ * git_diff_print_patch() or git_diff_print_compact(), for instance).
*/
GIT_DIFF_LINE_FILE_HDR = 'F',
GIT_DIFF_LINE_HUNK_HDR = 'H',
GIT_DIFF_LINE_BINARY = 'B'
-};
+} git_diff_line_t;
/**
* When iterating over a diff, callback that will be made per text diff
@@ -183,13 +357,108 @@ enum {
* of text. This uses some extra GIT_DIFF_LINE_... constants for output
* of lines of file and hunk headers.
*/
-typedef int (*git_diff_data_fn)(
- void *cb_data,
- git_diff_delta *delta,
- git_diff_range *range,
- char line_origin, /**< GIT_DIFF_LINE_... value from above */
- const char *content,
- size_t content_len);
+typedef int (*git_diff_data_cb)(
+ const git_diff_delta *delta, /** delta that contains this data */
+ const git_diff_range *range, /** range of lines containing this data */
+ char line_origin, /** git_diff_list_t value from above */
+ const char *content, /** diff data - not NUL terminated */
+ size_t content_len, /** number of bytes of diff data */
+ void *payload); /** user reference data */
+
+/**
+ * The diff patch is used to store all the text diffs for a delta.
+ *
+ * You can easily loop over the content of patches and get information about
+ * them.
+ */
+typedef struct git_diff_patch git_diff_patch;
+
+/**
+ * Flags to control the behavior of diff rename/copy detection.
+ */
+typedef enum {
+ /** look for renames? (`--find-renames`) */
+ GIT_DIFF_FIND_RENAMES = (1 << 0),
+ /** consider old side of modified for renames? (`--break-rewrites=N`) */
+ GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1),
+
+ /** look for copies? (a la `--find-copies`) */
+ GIT_DIFF_FIND_COPIES = (1 << 2),
+ /** consider unmodified as copy sources? (`--find-copies-harder`) */
+ GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
+
+ /** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
+ GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
+
+ /** turn on all finding features */
+ GIT_DIFF_FIND_ALL = (0x1f),
+
+ /** measure similarity ignoring leading whitespace (default) */
+ GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
+ /** measure similarity ignoring all whitespace */
+ GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6),
+ /** measure similarity including all data */
+ GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7),
+} git_diff_find_t;
+
+/**
+ * Pluggable similarity metric
+ */
+typedef struct {
+ int (*file_signature)(
+ void **out, const git_diff_file *file,
+ const char *fullpath, void *payload);
+ int (*buffer_signature)(
+ void **out, const git_diff_file *file,
+ const char *buf, size_t buflen, void *payload);
+ void (*free_signature)(void *sig, void *payload);
+ int (*similarity)(int *score, void *siga, void *sigb, void *payload);
+ void *payload;
+} git_diff_similarity_metric;
+
+/**
+ * Control behavior of rename and copy detection
+ *
+ * These options mostly mimic parameters that can be passed to git-diff.
+ *
+ * - `rename_threshold` is the same as the -M option with a value
+ * - `copy_threshold` is the same as the -C option with a value
+ * - `rename_from_rewrite_threshold` matches the top of the -B option
+ * - `break_rewrite_threshold` matches the bottom of the -B option
+ * - `target_limit` matches the -l option
+ *
+ * The `metric` option allows you to plug in a custom similarity metric.
+ * Set it to NULL for the default internal metric which is based on sampling
+ * hashes of ranges of data in the file. The default metric is a pretty
+ * good similarity approximation that should work fairly well for both text
+ * and binary data, and is pretty fast with fixed memory overhead.
+ */
+typedef struct {
+ unsigned int version;
+
+ /** Combination of git_diff_find_t values (default FIND_RENAMES) */
+ unsigned int flags;
+
+ /** Similarity to consider a file renamed (default 50) */
+ unsigned int rename_threshold;
+ /** Similarity of modified to be eligible rename source (default 50) */
+ unsigned int rename_from_rewrite_threshold;
+ /** Similarity to consider a file a copy (default 50) */
+ unsigned int copy_threshold;
+ /** Similarity to split modify into delete/add pair (default 60) */
+ unsigned int break_rewrite_threshold;
+
+ /** Maximum similarity sources to examine (a la diff's `-l` option or
+ * the `diff.renameLimit` config) (default 200)
+ */
+ unsigned int target_limit;
+
+ /** Pluggable similarity metric; pass NULL to use internal metric */
+ git_diff_similarity_metric *metric;
+} git_diff_find_options;
+
+#define GIT_DIFF_FIND_OPTIONS_VERSION 1
+#define GIT_DIFF_FIND_OPTIONS_INIT {GIT_DIFF_FIND_OPTIONS_VERSION}
/** @name Diff List Generator Functions
*
@@ -204,69 +473,104 @@ typedef int (*git_diff_data_fn)(
GIT_EXTERN(void) git_diff_list_free(git_diff_list *diff);
/**
- * Compute a difference between two tree objects.
+ * Create a diff list with the difference between two tree objects.
+ *
+ * This is equivalent to `git diff <old-tree> <new-tree>`
+ *
+ * The first tree will be used for the "old_file" side of the delta and the
+ * second tree will be used for the "new_file" side of the delta.
*
+ * @param diff Output pointer to a git_diff_list pointer to be allocated.
* @param repo The repository containing the trees.
- * @param opts Structure with options to influence diff or NULL for defaults.
* @param old_tree A git_tree object to diff from.
* @param new_tree A git_tree object to diff to.
- * @param diff A pointer to a git_diff_list pointer that will be allocated.
+ * @param opts Structure with options to influence diff or NULL for defaults.
*/
GIT_EXTERN(int) git_diff_tree_to_tree(
+ git_diff_list **diff,
git_repository *repo,
- const git_diff_options *opts, /**< can be NULL for defaults */
git_tree *old_tree,
git_tree *new_tree,
- git_diff_list **diff);
+ const git_diff_options *opts); /**< can be NULL for defaults */
/**
- * Compute a difference between a tree and the index.
+ * Create a diff list between a tree and repository index.
*
+ * This is equivalent to `git diff --cached <treeish>` or if you pass
+ * the HEAD tree, then like `git diff --cached`.
+ *
+ * The tree you pass will be used for the "old_file" side of the delta, and
+ * the index will be used for the "new_file" side of the delta.
+ *
+ * @param diff Output pointer to a git_diff_list pointer to be allocated.
* @param repo The repository containing the tree and index.
- * @param opts Structure with options to influence diff or NULL for defaults.
* @param old_tree A git_tree object to diff from.
- * @param diff A pointer to a git_diff_list pointer that will be allocated.
+ * @param index The index to diff with; repo index used if NULL.
+ * @param opts Structure with options to influence diff or NULL for defaults.
*/
-GIT_EXTERN(int) git_diff_index_to_tree(
+GIT_EXTERN(int) git_diff_tree_to_index(
+ git_diff_list **diff,
git_repository *repo,
- const git_diff_options *opts, /**< can be NULL for defaults */
git_tree *old_tree,
- git_diff_list **diff);
+ git_index *index,
+ const git_diff_options *opts); /**< can be NULL for defaults */
/**
- * Compute a difference between the working directory and the index.
+ * Create a diff list between the repository index and the workdir directory.
*
+ * This matches the `git diff` command. See the note below on
+ * `git_diff_tree_to_workdir` for a discussion of the difference between
+ * `git diff` and `git diff HEAD` and how to emulate a `git diff <treeish>`
+ * using libgit2.
+ *
+ * The index will be used for the "old_file" side of the delta, and the
+ * working directory will be used for the "new_file" side of the delta.
+ *
+ * @param diff Output pointer to a git_diff_list pointer to be allocated.
* @param repo The repository.
+ * @param index The index to diff from; repo index used if NULL.
* @param opts Structure with options to influence diff or NULL for defaults.
- * @param diff A pointer to a git_diff_list pointer that will be allocated.
*/
-GIT_EXTERN(int) git_diff_workdir_to_index(
+GIT_EXTERN(int) git_diff_index_to_workdir(
+ git_diff_list **diff,
git_repository *repo,
- const git_diff_options *opts, /**< can be NULL for defaults */
- git_diff_list **diff);
+ git_index *index,
+ const git_diff_options *opts); /**< can be NULL for defaults */
/**
- * Compute a difference between the working directory and a tree.
+ * Create a diff list between a tree and the working directory.
+ *
+ * The tree you provide will be used for the "old_file" side of the delta,
+ * and the working directory will be used for the "new_file" side.
+ *
+ * Please note: this is *NOT* the same as `git diff <treeish>`. Running
+ * `git diff HEAD` or the like actually uses information from the index,
+ * along with the tree and working directory info.
+ *
+ * This function returns strictly the differences between the tree and the
+ * files contained in the working directory, regardless of the state of
+ * files in the index. It may come as a surprise, but there is no direct
+ * equivalent in core git.
*
- * This returns strictly the differences between the tree and the
- * files contained in the working directory, regardless of the state
- * of files in the index. There is no direct equivalent in C git.
+ * To emulate `git diff <treeish>`, call both `git_diff_tree_to_index` and
+ * `git_diff_index_to_workdir`, then call `git_diff_merge` on the results.
+ * That will yield a `git_diff_list` that matches the git output.
*
- * This is *NOT* the same as 'git diff HEAD' or 'git diff <SHA>'. Those
- * commands diff the tree, the index, and the workdir. To emulate those
- * functions, call `git_diff_index_to_tree` and `git_diff_workdir_to_index`,
- * then call `git_diff_merge` on the results.
+ * If this seems confusing, take the case of a file with a staged deletion
+ * where the file has then been put back into the working dir and modified.
+ * The tree-to-workdir diff for that file is 'modified', but core git would
+ * show status 'deleted' since there is a pending deletion in the index.
*
+ * @param diff A pointer to a git_diff_list pointer that will be allocated.
* @param repo The repository containing the tree.
- * @param opts Structure with options to influence diff or NULL for defaults.
* @param old_tree A git_tree object to diff from.
- * @param diff A pointer to a git_diff_list pointer that will be allocated.
+ * @param opts Structure with options to influence diff or NULL for defaults.
*/
-GIT_EXTERN(int) git_diff_workdir_to_tree(
+GIT_EXTERN(int) git_diff_tree_to_workdir(
+ git_diff_list **diff,
git_repository *repo,
- const git_diff_options *opts, /**< can be NULL for defaults */
git_tree *old_tree,
- git_diff_list **diff);
+ const git_diff_options *opts); /**< can be NULL for defaults */
/**
* Merge one diff list into another.
@@ -285,6 +589,22 @@ GIT_EXTERN(int) git_diff_merge(
git_diff_list *onto,
const git_diff_list *from);
+/**
+ * Transform a diff list marking file renames, copies, etc.
+ *
+ * This modifies a diff list in place, replacing old entries that look
+ * like renames or copies with new entries reflecting those changes.
+ * This also will, if requested, break modified files into add/remove
+ * pairs if the amount of change is above a threshold.
+ *
+ * @param diff Diff list to run detection algorithms on
+ * @param options Control how detection should be run, NULL for defaults
+ * @return 0 on success, -1 on failure
+ */
+GIT_EXTERN(int) git_diff_find_similar(
+ git_diff_list *diff,
+ git_diff_find_options *options);
+
/**@}*/
@@ -296,37 +616,273 @@ GIT_EXTERN(int) git_diff_merge(
/**@{*/
/**
- * Iterate over a diff list issuing callbacks.
+ * Loop over all deltas in a diff list issuing callbacks.
+ *
+ * This will iterate through all of the files described in a diff. You
+ * should provide a file callback to learn about each file.
+ *
+ * The "hunk" and "line" callbacks are optional, and the text diff of the
+ * files will only be calculated if they are not NULL. Of course, these
+ * callbacks will not be invoked for binary files on the diff list or for
+ * files whose only changed is a file mode change.
*
- * If the hunk and/or line callbacks are not NULL, then this will calculate
- * text diffs for all files it thinks are not binary. If those are both
- * NULL, then this will not bother with the text diffs, so it can be
- * efficient.
+ * Returning a non-zero value from any of the callbacks will terminate
+ * the iteration and cause this return `GIT_EUSER`.
+ *
+ * @param diff A git_diff_list generated by one of the above functions.
+ * @param file_cb Callback function to make per file in the diff.
+ * @param hunk_cb Optional callback to make per hunk of text diff. This
+ * callback is called to describe a range of lines in the
+ * diff. It will not be issued for binary files.
+ * @param line_cb Optional callback to make per line of diff text. This
+ * same callback will be made for context lines, added, and
+ * removed lines, and even for a deleted trailing newline.
+ * @param payload Reference pointer that will be passed to your callbacks.
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_foreach(
git_diff_list *diff,
- void *cb_data,
- git_diff_file_fn file_cb,
- git_diff_hunk_fn hunk_cb,
- git_diff_data_fn line_cb);
+ git_diff_file_cb file_cb,
+ git_diff_hunk_cb hunk_cb,
+ git_diff_data_cb line_cb,
+ void *payload);
/**
* Iterate over a diff generating text output like "git diff --name-status".
+ *
+ * Returning a non-zero value from the callbacks will terminate the
+ * iteration and cause this return `GIT_EUSER`.
+ *
+ * @param diff A git_diff_list generated by one of the above functions.
+ * @param print_cb Callback to make per line of diff text.
+ * @param payload Reference pointer that will be passed to your callback.
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_print_compact(
git_diff_list *diff,
- void *cb_data,
- git_diff_data_fn print_cb);
+ git_diff_data_cb print_cb,
+ void *payload);
+
+/**
+ * Look up the single character abbreviation for a delta status code.
+ *
+ * When you call `git_diff_print_compact` it prints single letter codes into
+ * the output such as 'A' for added, 'D' for deleted, 'M' for modified, etc.
+ * It is sometimes convenient to convert a git_delta_t value into these
+ * letters for your own purposes. This function does just that. By the
+ * way, unmodified will return a space (i.e. ' ').
+ *
+ * @param delta_t The git_delta_t value to look up
+ * @return The single character label for that code
+ */
+GIT_EXTERN(char) git_diff_status_char(git_delta_t status);
/**
* Iterate over a diff generating text output like "git diff".
*
* This is a super easy way to generate a patch from a diff.
+ *
+ * Returning a non-zero value from the callbacks will terminate the
+ * iteration and cause this return `GIT_EUSER`.
+ *
+ * @param diff A git_diff_list generated by one of the above functions.
+ * @param payload Reference pointer that will be passed to your callbacks.
+ * @param print_cb Callback function to output lines of the diff. This
+ * same function will be called for file headers, hunk
+ * headers, and diff lines. Fortunately, you can probably
+ * use various GIT_DIFF_LINE constants to determine what
+ * text you are given.
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_print_patch(
git_diff_list *diff,
- void *cb_data,
- git_diff_data_fn print_cb);
+ git_diff_data_cb print_cb,
+ void *payload);
+
+/**
+ * Query how many diff records are there in a diff list.
+ *
+ * @param diff A git_diff_list generated by one of the above functions
+ * @return Count of number of deltas in the list
+ */
+GIT_EXTERN(size_t) git_diff_num_deltas(git_diff_list *diff);
+
+/**
+ * Query how many diff deltas are there in a diff list filtered by type.
+ *
+ * This works just like `git_diff_entrycount()` with an extra parameter
+ * that is a `git_delta_t` and returns just the count of how many deltas
+ * match that particular type.
+ *
+ * @param diff A git_diff_list generated by one of the above functions
+ * @param type A git_delta_t value to filter the count
+ * @return Count of number of deltas matching delta_t type
+ */
+GIT_EXTERN(size_t) git_diff_num_deltas_of_type(
+ git_diff_list *diff,
+ git_delta_t type);
+
+/**
+ * Return the diff delta and patch for an entry in the diff list.
+ *
+ * The `git_diff_patch` is a newly created object contains the text diffs
+ * for the delta. You have to call `git_diff_patch_free()` when you are
+ * done with it. You can use the patch object to loop over all the hunks
+ * and lines in the diff of the one delta.
+ *
+ * For an unchanged file or a binary file, no `git_diff_patch` will be
+ * created, the output will be set to NULL, and the `binary` flag will be
+ * set true in the `git_diff_delta` structure.
+ *
+ * The `git_diff_delta` pointer points to internal data and you do not have
+ * to release it when you are done with it. It will go away when the
+ * `git_diff_list` and `git_diff_patch` go away.
+ *
+ * It is okay to pass NULL for either of the output parameters; if you pass
+ * NULL for the `git_diff_patch`, then the text diff will not be calculated.
+ *
+ * @param patch_out Output parameter for the delta patch object
+ * @param delta_out Output parameter for the delta object
+ * @param diff Diff list object
+ * @param idx Index into diff list
+ * @return 0 on success, other value < 0 on error
+ */
+GIT_EXTERN(int) git_diff_get_patch(
+ git_diff_patch **patch_out,
+ const git_diff_delta **delta_out,
+ git_diff_list *diff,
+ size_t idx);
+
+/**
+ * Free a git_diff_patch object.
+ */
+GIT_EXTERN(void) git_diff_patch_free(
+ git_diff_patch *patch);
+
+/**
+ * Get the delta associated with a patch
+ */
+GIT_EXTERN(const git_diff_delta *) git_diff_patch_delta(
+ git_diff_patch *patch);
+
+/**
+ * Get the number of hunks in a patch
+ */
+GIT_EXTERN(size_t) git_diff_patch_num_hunks(
+ git_diff_patch *patch);
+
+/**
+ * Get line counts of each type in a patch.
+ *
+ * This helps imitate a diff --numstat type of output. For that purpose,
+ * you only need the `total_additions` and `total_deletions` values, but we
+ * include the `total_context` line count in case you want the total number
+ * of lines of diff output that will be generated.
+ *
+ * All outputs are optional. Pass NULL if you don't need a particular count.
+ *
+ * @param total_context Count of context lines in output, can be NULL.
+ * @param total_additions Count of addition lines in output, can be NULL.
+ * @param total_deletions Count of deletion lines in output, can be NULL.
+ * @param patch The git_diff_patch object
+ * @return Number of lines in hunk or -1 if invalid hunk index
+ */
+GIT_EXTERN(int) git_diff_patch_line_stats(
+ size_t *total_context,
+ size_t *total_additions,
+ size_t *total_deletions,
+ const git_diff_patch *patch);
+
+/**
+ * Get the information about a hunk in a patch
+ *
+ * Given a patch and a hunk index into the patch, this returns detailed
+ * information about that hunk. Any of the output pointers can be passed
+ * as NULL if you don't care about that particular piece of information.
+ *
+ * @param range Output pointer to git_diff_range of hunk
+ * @param header Output pointer to header string for hunk. Unlike the
+ * content pointer for each line, this will be NUL-terminated
+ * @param header_len Output value of characters in header string
+ * @param lines_in_hunk Output count of total lines in this hunk
+ * @param patch Input pointer to patch object
+ * @param hunk_idx Input index of hunk to get information about
+ * @return 0 on success, GIT_ENOTFOUND if hunk_idx out of range, <0 on error
+ */
+GIT_EXTERN(int) git_diff_patch_get_hunk(
+ const git_diff_range **range,
+ const char **header,
+ size_t *header_len,
+ size_t *lines_in_hunk,
+ git_diff_patch *patch,
+ size_t hunk_idx);
+
+/**
+ * Get the number of lines in a hunk.
+ *
+ * @param patch The git_diff_patch object
+ * @param hunk_idx Index of the hunk
+ * @return Number of lines in hunk or -1 if invalid hunk index
+ */
+GIT_EXTERN(int) git_diff_patch_num_lines_in_hunk(
+ git_diff_patch *patch,
+ size_t hunk_idx);
+
+/**
+ * Get data about a line in a hunk of a patch.
+ *
+ * Given a patch, a hunk index, and a line index in the hunk, this
+ * will return a lot of details about that line. If you pass a hunk
+ * index larger than the number of hunks or a line index larger than
+ * the number of lines in the hunk, this will return -1.
+ *
+ * @param line_origin A GIT_DIFF_LINE constant from above
+ * @param content Pointer to content of diff line, not NUL-terminated
+ * @param content_len Number of characters in content
+ * @param old_lineno Line number in old file or -1 if line is added
+ * @param new_lineno Line number in new file or -1 if line is deleted
+ * @param patch The patch to look in
+ * @param hunk_idx The index of the hunk
+ * @param line_of_index The index of the line in the hunk
+ * @return 0 on success, <0 on failure
+ */
+GIT_EXTERN(int) git_diff_patch_get_line_in_hunk(
+ char *line_origin,
+ const char **content,
+ size_t *content_len,
+ int *old_lineno,
+ int *new_lineno,
+ git_diff_patch *patch,
+ size_t hunk_idx,
+ size_t line_of_hunk);
+
+/**
+ * Serialize the patch to text via callback.
+ *
+ * Returning a non-zero value from the callback will terminate the iteration
+ * and cause this return `GIT_EUSER`.
+ *
+ * @param patch A git_diff_patch representing changes to one file
+ * @param print_cb Callback function to output lines of the patch. Will be
+ * called for file headers, hunk headers, and diff lines.
+ * @param payload Reference pointer that will be passed to your callbacks.
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
+ */
+GIT_EXTERN(int) git_diff_patch_print(
+ git_diff_patch *patch,
+ git_diff_data_cb print_cb,
+ void *payload);
+
+/**
+ * Get the content of a patch as a single diff text.
+ *
+ * @param string Allocated string; caller must free.
+ * @param patch A git_diff_patch representing changes to one file
+ * @return 0 on success, <0 on failure.
+ */
+GIT_EXTERN(int) git_diff_patch_to_str(
+ char **string,
+ git_diff_patch *patch);
/**@}*/
@@ -336,24 +892,55 @@ GIT_EXTERN(int) git_diff_print_patch(
*/
/**
- * Directly run a text diff on two blobs.
+ * Directly run a diff on two blobs.
*
- * Compared to a file, a blob lacks some contextual information. As such, the
- * `git_diff_file` parameters of the callbacks will be filled accordingly to the following:
- * `mode` will be set to 0, `path` will be set to NULL. When dealing with a NULL blob, `oid`
- * will be set to 0.
+ * Compared to a file, a blob lacks some contextual information. As such,
+ * the `git_diff_file` given to the callback will have some fake data; i.e.
+ * `mode` will be 0 and `path` will be NULL.
*
- * When at least one of the blobs being dealt with is binary, the `git_diff_delta` binary
- * attribute will be set to 1 and no call to the hunk_cb nor line_cb will be made.
+ * NULL is allowed for either `old_blob` or `new_blob` and will be treated
+ * as an empty blob, with the `oid` set to NULL in the `git_diff_file` data.
+ * Passing NULL for both blobs is a noop; no callbacks will be made at all.
+ *
+ * We do run a binary content check on the blob content and if either blob
+ * looks like binary data, the `git_diff_delta` binary attribute will be set
+ * to 1 and no call to the hunk_cb nor line_cb will be made (unless you pass
+ * `GIT_DIFF_FORCE_TEXT` of course).
+ *
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_blobs(
- git_blob *old_blob,
- git_blob *new_blob,
- git_diff_options *options,
- void *cb_data,
- git_diff_file_fn file_cb,
- git_diff_hunk_fn hunk_cb,
- git_diff_data_fn line_cb);
+ const git_blob *old_blob,
+ const git_blob *new_blob,
+ const git_diff_options *options,
+ git_diff_file_cb file_cb,
+ git_diff_hunk_cb hunk_cb,
+ git_diff_data_cb line_cb,
+ void *payload);
+
+/**
+ * Directly run a diff between a blob and a buffer.
+ *
+ * As with `git_diff_blobs`, comparing a blob and buffer lacks some context,
+ * so the `git_diff_file` parameters to the callbacks will be faked a la the
+ * rules for `git_diff_blobs()`.
+ *
+ * Passing NULL for `old_blob` will be treated as an empty blob (i.e. the
+ * `file_cb` will be invoked with GIT_DELTA_ADDED and the diff will be the
+ * entire content of the buffer added). Passing NULL to the buffer will do
+ * the reverse, with GIT_DELTA_REMOVED and blob content removed.
+ *
+ * @return 0 on success, GIT_EUSER on non-zero callback, or error code
+ */
+GIT_EXTERN(int) git_diff_blob_to_buffer(
+ const git_blob *old_blob,
+ const char *buffer,
+ size_t buffer_len,
+ const git_diff_options *options,
+ git_diff_file_cb file_cb,
+ git_diff_hunk_cb hunk_cb,
+ git_diff_data_cb data_cb,
+ void *payload);
GIT_END_DECL