diff options
author | peter <peter@FreeBSD.org> | 2013-06-18 02:07:41 +0000 |
---|---|---|
committer | peter <peter@FreeBSD.org> | 2013-06-18 02:07:41 +0000 |
commit | d25dac7fcc6acc838b71bbda8916fd9665c709ab (patch) | |
tree | 135691142dc0e75a5e5d97b5074d03436435b8e0 /subversion/libsvn_diff | |
download | FreeBSD-src-d25dac7fcc6acc838b71bbda8916fd9665c709ab.zip FreeBSD-src-d25dac7fcc6acc838b71bbda8916fd9665c709ab.tar.gz |
Import trimmed svn-1.8.0-rc3
Diffstat (limited to 'subversion/libsvn_diff')
-rw-r--r-- | subversion/libsvn_diff/deprecated.c | 289 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff.c | 199 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff.h | 217 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff3.c | 529 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff4.c | 314 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff_file.c | 2414 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff_memory.c | 1161 | ||||
-rw-r--r-- | subversion/libsvn_diff/diff_tree.c | 1705 | ||||
-rw-r--r-- | subversion/libsvn_diff/lcs.c | 375 | ||||
-rw-r--r-- | subversion/libsvn_diff/parse-diff.c | 1373 | ||||
-rw-r--r-- | subversion/libsvn_diff/token.c | 198 | ||||
-rw-r--r-- | subversion/libsvn_diff/util.c | 591 |
12 files changed, 9365 insertions, 0 deletions
diff --git a/subversion/libsvn_diff/deprecated.c b/subversion/libsvn_diff/deprecated.c new file mode 100644 index 0000000..891ad5f --- /dev/null +++ b/subversion/libsvn_diff/deprecated.c @@ -0,0 +1,289 @@ +/* + * deprecated.c: holding file for all deprecated APIs. + * "we can't lose 'em, but we can shun 'em!" + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +/* ==================================================================== */ + + + +/*** Includes. ***/ + +/* We define this here to remove any further warnings about the usage of + deprecated functions in this file. */ +#define SVN_DEPRECATED + +#include "svn_diff.h" +#include "svn_utf.h" + +#include "svn_private_config.h" + + + + +/*** Code. ***/ +struct fns_wrapper_baton +{ + /* We put the old baton in front of this one, so that we can still use + this baton in place of the old. This prevents us from having to + implement simple wrappers around each member of diff_fns_t. */ + void *old_baton; + const svn_diff_fns_t *vtable; +}; + +static svn_error_t * +datasources_open(void *baton, + apr_off_t *prefix_lines, + apr_off_t *suffix_lines, + const svn_diff_datasource_e *datasources, + apr_size_t datasource_len) +{ + struct fns_wrapper_baton *fwb = baton; + apr_size_t i; + + /* Just iterate over the datasources, using the old singular version. */ + for (i = 0; i < datasource_len; i++) + { + SVN_ERR(fwb->vtable->datasource_open(fwb->old_baton, datasources[i])); + } + + /* Don't claim any prefix or suffix matches. */ + *prefix_lines = 0; + *suffix_lines = 0; + + return SVN_NO_ERROR; +} + +static svn_error_t * +datasource_close(void *baton, + svn_diff_datasource_e datasource) +{ + struct fns_wrapper_baton *fwb = baton; + return fwb->vtable->datasource_close(fwb->old_baton, datasource); +} + +static svn_error_t * +datasource_get_next_token(apr_uint32_t *hash, + void **token, + void *baton, + svn_diff_datasource_e datasource) +{ + struct fns_wrapper_baton *fwb = baton; + return fwb->vtable->datasource_get_next_token(hash, token, fwb->old_baton, + datasource); +} + +static svn_error_t * +token_compare(void *baton, + void *ltoken, + void *rtoken, + int *compare) +{ + struct fns_wrapper_baton *fwb = baton; + return fwb->vtable->token_compare(fwb->old_baton, ltoken, rtoken, compare); +} + +static void +token_discard(void *baton, + void *token) +{ + struct fns_wrapper_baton *fwb = baton; + fwb->vtable->token_discard(fwb->old_baton, token); +} + +static void +token_discard_all(void *baton) +{ + struct fns_wrapper_baton *fwb = baton; + fwb->vtable->token_discard_all(fwb->old_baton); +} + + +static void +wrap_diff_fns(svn_diff_fns2_t **diff_fns2, + struct fns_wrapper_baton **baton2, + const svn_diff_fns_t *diff_fns, + void *baton, + apr_pool_t *result_pool) +{ + /* Initialize the return vtable. */ + *diff_fns2 = apr_palloc(result_pool, sizeof(**diff_fns2)); + + (*diff_fns2)->datasources_open = datasources_open; + (*diff_fns2)->datasource_close = datasource_close; + (*diff_fns2)->datasource_get_next_token = datasource_get_next_token; + (*diff_fns2)->token_compare = token_compare; + (*diff_fns2)->token_discard = token_discard; + (*diff_fns2)->token_discard_all = token_discard_all; + + /* Initialize the wrapper baton. */ + *baton2 = apr_palloc(result_pool, sizeof (**baton2)); + (*baton2)->old_baton = baton; + (*baton2)->vtable = diff_fns; +} + + +/*** From diff_file.c ***/ +svn_error_t * +svn_diff_file_output_unified2(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_path, + const char *modified_path, + const char *original_header, + const char *modified_header, + const char *header_encoding, + apr_pool_t *pool) +{ + return svn_diff_file_output_unified3(output_stream, diff, + original_path, modified_path, + original_header, modified_header, + header_encoding, NULL, FALSE, pool); +} + +svn_error_t * +svn_diff_file_output_unified(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_path, + const char *modified_path, + const char *original_header, + const char *modified_header, + apr_pool_t *pool) +{ + return svn_diff_file_output_unified2(output_stream, diff, + original_path, modified_path, + original_header, modified_header, + SVN_APR_LOCALE_CHARSET, pool); +} + +svn_error_t * +svn_diff_file_diff(svn_diff_t **diff, + const char *original, + const char *modified, + apr_pool_t *pool) +{ + return svn_diff_file_diff_2(diff, original, modified, + svn_diff_file_options_create(pool), pool); +} + +svn_error_t * +svn_diff_file_diff3(svn_diff_t **diff, + const char *original, + const char *modified, + const char *latest, + apr_pool_t *pool) +{ + return svn_diff_file_diff3_2(diff, original, modified, latest, + svn_diff_file_options_create(pool), pool); +} + +svn_error_t * +svn_diff_file_diff4(svn_diff_t **diff, + const char *original, + const char *modified, + const char *latest, + const char *ancestor, + apr_pool_t *pool) +{ + return svn_diff_file_diff4_2(diff, original, modified, latest, ancestor, + svn_diff_file_options_create(pool), pool); +} + +svn_error_t * +svn_diff_file_output_merge(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_path, + const char *modified_path, + const char *latest_path, + const char *conflict_original, + const char *conflict_modified, + const char *conflict_latest, + const char *conflict_separator, + svn_boolean_t display_original_in_conflict, + svn_boolean_t display_resolved_conflicts, + apr_pool_t *pool) +{ + svn_diff_conflict_display_style_t style = + svn_diff_conflict_display_modified_latest; + + if (display_resolved_conflicts) + style = svn_diff_conflict_display_resolved_modified_latest; + + if (display_original_in_conflict) + style = svn_diff_conflict_display_modified_original_latest; + + return svn_diff_file_output_merge2(output_stream, + diff, + original_path, + modified_path, + latest_path, + conflict_original, + conflict_modified, + conflict_latest, + conflict_separator, + style, + pool); +} + + +/*** From diff.c ***/ +svn_error_t * +svn_diff_diff(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns_t *vtable, + apr_pool_t *pool) +{ + svn_diff_fns2_t *diff_fns2; + struct fns_wrapper_baton *fwb; + + wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool); + return svn_diff_diff_2(diff, fwb, diff_fns2, pool); +} + + +/*** From diff3.c ***/ +svn_error_t * +svn_diff_diff3(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns_t *vtable, + apr_pool_t *pool) +{ + svn_diff_fns2_t *diff_fns2; + struct fns_wrapper_baton *fwb; + + wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool); + return svn_diff_diff3_2(diff, fwb, diff_fns2, pool); +} + + +/*** From diff4.c ***/ +svn_error_t * +svn_diff_diff4(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns_t *vtable, + apr_pool_t *pool) +{ + svn_diff_fns2_t *diff_fns2; + struct fns_wrapper_baton *fwb; + + wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool); + return svn_diff_diff4_2(diff, fwb, diff_fns2, pool); +} diff --git a/subversion/libsvn_diff/diff.c b/subversion/libsvn_diff/diff.c new file mode 100644 index 0000000..f43a3be --- /dev/null +++ b/subversion/libsvn_diff/diff.c @@ -0,0 +1,199 @@ +/* + * diff.c : routines for doing diffs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "svn_pools.h" +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" + +#include "diff.h" + + +svn_diff__token_index_t* +svn_diff__get_token_counts(svn_diff__position_t *loop_start, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool) +{ + svn_diff__token_index_t *token_counts; + svn_diff__token_index_t token_index; + svn_diff__position_t *current; + + token_counts = apr_palloc(pool, num_tokens * sizeof(*token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + token_counts[token_index] = 0; + + current = loop_start; + if (current != NULL) + { + do + { + token_counts[current->token_index]++; + current = current->next; + } + while (current != loop_start); + } + + return token_counts; +} + + +svn_diff_t * +svn_diff__diff(svn_diff__lcs_t *lcs, + apr_off_t original_start, apr_off_t modified_start, + svn_boolean_t want_common, + apr_pool_t *pool) +{ + svn_diff_t *diff; + svn_diff_t **diff_ref = &diff; + + while (1) + { + if (original_start < lcs->position[0]->offset + || modified_start < lcs->position[1]->offset) + { + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_diff_modified; + (*diff_ref)->original_start = original_start - 1; + (*diff_ref)->original_length = + lcs->position[0]->offset - original_start; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = + lcs->position[1]->offset - modified_start; + (*diff_ref)->latest_start = 0; + (*diff_ref)->latest_length = 0; + + diff_ref = &(*diff_ref)->next; + } + + /* Detect the EOF */ + if (lcs->length == 0) + break; + + original_start = lcs->position[0]->offset; + modified_start = lcs->position[1]->offset; + + if (want_common) + { + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_common; + (*diff_ref)->original_start = original_start - 1; + (*diff_ref)->original_length = lcs->length; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = lcs->length; + (*diff_ref)->latest_start = 0; + (*diff_ref)->latest_length = 0; + + diff_ref = &(*diff_ref)->next; + } + + original_start += lcs->length; + modified_start += lcs->length; + + lcs = lcs->next; + } + + *diff_ref = NULL; + + return diff; +} + + +svn_error_t * +svn_diff_diff_2(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns2_t *vtable, + apr_pool_t *pool) +{ + svn_diff__tree_t *tree; + svn_diff__position_t *position_list[2]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[2]; + svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, + svn_diff_datasource_modified}; + svn_diff__lcs_t *lcs; + apr_pool_t *subpool; + apr_pool_t *treepool; + apr_off_t prefix_lines = 0; + apr_off_t suffix_lines = 0; + + *diff = NULL; + + subpool = svn_pool_create(pool); + treepool = svn_pool_create(pool); + + svn_diff__tree_create(&tree, treepool); + + SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines, + datasource, 2)); + + /* Insert the data into the tree */ + SVN_ERR(svn_diff__get_tokens(&position_list[0], + tree, + diff_baton, vtable, + svn_diff_datasource_original, + prefix_lines, + subpool)); + + SVN_ERR(svn_diff__get_tokens(&position_list[1], + tree, + diff_baton, vtable, + svn_diff_datasource_modified, + prefix_lines, + subpool)); + + num_tokens = svn_diff__get_node_count(tree); + + /* The cool part is that we don't need the tokens anymore. + * Allow the app to clean them up if it wants to. + */ + if (vtable->token_discard_all != NULL) + vtable->token_discard_all(diff_baton); + + /* We don't need the nodes in the tree either anymore, nor the tree itself */ + svn_pool_destroy(treepool); + + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, + subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, + subpool); + + /* Get the lcs */ + lcs = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, + suffix_lines, subpool); + + /* Produce the diff */ + *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool); + + /* Get rid of all the data we don't have a use for anymore */ + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_diff/diff.h b/subversion/libsvn_diff/diff.h new file mode 100644 index 0000000..51a84c6 --- /dev/null +++ b/subversion/libsvn_diff/diff.h @@ -0,0 +1,217 @@ +/* + * diff.h : private header file + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#if !defined(DIFF_H) +#define DIFF_H + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "svn_diff.h" +#include "svn_types.h" + +#define SVN_DIFF__UNIFIED_CONTEXT_SIZE 3 + +typedef struct svn_diff__node_t svn_diff__node_t; +typedef struct svn_diff__tree_t svn_diff__tree_t; +typedef struct svn_diff__position_t svn_diff__position_t; +typedef struct svn_diff__lcs_t svn_diff__lcs_t; + +typedef enum svn_diff__type_e +{ + svn_diff__type_common, + svn_diff__type_diff_modified, + svn_diff__type_diff_latest, + svn_diff__type_diff_common, + svn_diff__type_conflict +} svn_diff__type_e; + +struct svn_diff_t { + svn_diff_t *next; + svn_diff__type_e type; + apr_off_t original_start; + apr_off_t original_length; + apr_off_t modified_start; + apr_off_t modified_length; + apr_off_t latest_start; + apr_off_t latest_length; + svn_diff_t *resolved_diff; +}; + +/* Type used for token indices and counts of tokens. Must be signed. */ +typedef long int svn_diff__token_index_t; + +struct svn_diff__position_t +{ + svn_diff__position_t *next; + svn_diff__token_index_t token_index; + apr_off_t offset; +}; + +struct svn_diff__lcs_t +{ + svn_diff__lcs_t *next; + svn_diff__position_t *position[2]; + apr_off_t length; + int refcount; +}; + + +/* State used when normalizing whitespace and EOL styles. */ +typedef enum svn_diff__normalize_state_t +{ + /* Initial state; not in a sequence of whitespace. */ + svn_diff__normalize_state_normal, + /* We're in a sequence of whitespace characters. Only entered if + we ignore whitespace. */ + svn_diff__normalize_state_whitespace, + /* The previous character was CR. */ + svn_diff__normalize_state_cr +} svn_diff__normalize_state_t; + + +/* + * Calculate the Longest Common Subsequence (LCS) between two datasources + * POSITION_LIST1 and POSITION_LIST2, with TOKEN_COUNTS_LIST1 and + * TOKEN_COUNTS_LIST2 the corresponding counts of the different tokens + * (indexed by the 'token_index' of the positions of each position_list). + * + * From the beginning of each list, PREFIX_LINES lines will be assumed to be + * equal and be excluded from the comparison process. Similarly, SUFFIX_LINES + * at the end of both sequences will be skipped. + * + * The resulting lcs structure will be the return value of this function. + * Allocations will be made from POOL. + */ +svn_diff__lcs_t * +svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ + svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + svn_diff__token_index_t *token_counts_list1, /* array of counts */ + svn_diff__token_index_t *token_counts_list2, /* array of counts */ + svn_diff__token_index_t num_tokens, /* length of count arrays */ + apr_off_t prefix_lines, + apr_off_t suffix_lines, + apr_pool_t *pool); + + +/* + * Returns number of tokens in a tree + */ +svn_diff__token_index_t +svn_diff__get_node_count(svn_diff__tree_t *tree); + +/* + * Support functions to build a tree of token positions + */ +void +svn_diff__tree_create(svn_diff__tree_t **tree, apr_pool_t *pool); + + +/* + * Get all tokens from a datasource. Return the + * last item in the (circular) list. + */ +svn_error_t * +svn_diff__get_tokens(svn_diff__position_t **position_list, + svn_diff__tree_t *tree, + void *diff_baton, + const svn_diff_fns2_t *vtable, + svn_diff_datasource_e datasource, + apr_off_t prefix_lines, + apr_pool_t *pool); + +/* + * Returns an array with the counts for the tokens in + * the looped linked list given in loop_start. + * num_tokens equals the highest possible token index +1. + */ +svn_diff__token_index_t* +svn_diff__get_token_counts(svn_diff__position_t *loop_start, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool); + +/* Morph a svn_lcs_t into a svn_diff_t. */ +svn_diff_t * +svn_diff__diff(svn_diff__lcs_t *lcs, + apr_off_t original_start, apr_off_t modified_start, + svn_boolean_t want_common, + apr_pool_t *pool); + +void +svn_diff__resolve_conflict(svn_diff_t *hunk, + svn_diff__position_t **position_list1, + svn_diff__position_t **position_list2, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool); + + +/* Normalize the characters pointed to by the buffer BUF (of length *LENGTHP) + * according to the options *OPTS, starting in the state *STATEP. + * + * Adjust *LENGTHP and *STATEP to be the length of the normalized buffer and + * the final state, respectively. + * Normalized data is written to the memory at *TGT. BUF and TGT may point + * to the same memory area. The memory area pointed to by *TGT should be + * large enough to hold *LENGTHP bytes. + * When on return *TGT is not equal to the value passed in, it points somewhere + * into the memory region designated by BUF and *LENGTHP. + */ +void +svn_diff__normalize_buffer(char **tgt, + apr_off_t *lengthp, + svn_diff__normalize_state_t *statep, + const char *buf, + const svn_diff_file_options_t *opts); + +/* Set *OUT_STR to a newline followed by a "\ No newline at end of file" line. + * + * The text will be encoded into HEADER_ENCODING. + */ +svn_error_t * +svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf, + const char *header_encoding, + apr_pool_t *scratch_pool); + +/* Write a unidiff hunk header to OUTPUT_STREAM. + * + * The header will use HUNK_DELIMITER (which should usually be "@@") before + * and after the line-number ranges which are formed from OLD_START, + * OLD_LENGTH, NEW_START and NEW_LENGTH. If HUNK_EXTRA_CONTEXT is not NULL, + * it will be written after the final delimiter, with an intervening space. + * + * The text will be encoded into HEADER_ENCODING. + */ +svn_error_t * +svn_diff__unified_write_hunk_header(svn_stream_t *output_stream, + const char *header_encoding, + const char *hunk_delimiter, + apr_off_t old_start, + apr_off_t old_length, + apr_off_t new_start, + apr_off_t new_length, + const char *hunk_extra_context, + apr_pool_t *scratch_pool); + + +#endif /* DIFF_H */ diff --git a/subversion/libsvn_diff/diff3.c b/subversion/libsvn_diff/diff3.c new file mode 100644 index 0000000..8b7c9b3 --- /dev/null +++ b/subversion/libsvn_diff/diff3.c @@ -0,0 +1,529 @@ +/* + * diff3.c : routines for doing diffs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "svn_pools.h" +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" + +#include "diff.h" + + +void +svn_diff__resolve_conflict(svn_diff_t *hunk, + svn_diff__position_t **position_list1, + svn_diff__position_t **position_list2, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool) +{ + apr_off_t modified_start = hunk->modified_start + 1; + apr_off_t latest_start = hunk->latest_start + 1; + apr_off_t common_length; + apr_off_t modified_length = hunk->modified_length; + apr_off_t latest_length = hunk->latest_length; + svn_diff__position_t *start_position[2]; + svn_diff__position_t *position[2]; + svn_diff__token_index_t *token_counts[2]; + svn_diff__lcs_t *lcs = NULL; + svn_diff__lcs_t **lcs_ref = &lcs; + svn_diff_t **diff_ref = &hunk->resolved_diff; + apr_pool_t *subpool; + + /* First find the starting positions for the + * comparison + */ + + start_position[0] = *position_list1; + start_position[1] = *position_list2; + + while (start_position[0]->offset < modified_start) + start_position[0] = start_position[0]->next; + + while (start_position[1]->offset < latest_start) + start_position[1] = start_position[1]->next; + + position[0] = start_position[0]; + position[1] = start_position[1]; + + common_length = modified_length < latest_length + ? modified_length : latest_length; + + while (common_length > 0 + && position[0]->token_index == position[1]->token_index) + { + position[0] = position[0]->next; + position[1] = position[1]->next; + + common_length--; + } + + if (common_length == 0 + && modified_length == latest_length) + { + hunk->type = svn_diff__type_diff_common; + hunk->resolved_diff = NULL; + + *position_list1 = position[0]; + *position_list2 = position[1]; + + return; + } + + hunk->type = svn_diff__type_conflict; + + /* ### If we have a conflict we can try to find the + * ### common parts in it by getting an lcs between + * ### modified (start to start + length) and + * ### latest (start to start + length). + * ### We use this lcs to create a simple diff. Only + * ### where there is a diff between the two, we have + * ### a conflict. + * ### This raises a problem; several common diffs and + * ### conflicts can occur within the same original + * ### block. This needs some thought. + * ### + * ### NB: We can use the node _pointers_ to identify + * ### different tokens + */ + + subpool = svn_pool_create(pool); + + /* Calculate how much of the two sequences was + * actually the same. + */ + common_length = (modified_length < latest_length + ? modified_length : latest_length) + - common_length; + + /* If there were matching symbols at the start of + * both sequences, record that fact. + */ + if (common_length > 0) + { + lcs = apr_palloc(subpool, sizeof(*lcs)); + lcs->next = NULL; + lcs->position[0] = start_position[0]; + lcs->position[1] = start_position[1]; + lcs->length = common_length; + + lcs_ref = &lcs->next; + } + + modified_length -= common_length; + latest_length -= common_length; + + modified_start = start_position[0]->offset; + latest_start = start_position[1]->offset; + + start_position[0] = position[0]; + start_position[1] = position[1]; + + /* Create a new ring for svn_diff__lcs to grok. + * We can safely do this given we don't need the + * positions we processed anymore. + */ + if (modified_length == 0) + { + *position_list1 = position[0]; + position[0] = NULL; + } + else + { + while (--modified_length) + position[0] = position[0]->next; + + *position_list1 = position[0]->next; + position[0]->next = start_position[0]; + } + + if (latest_length == 0) + { + *position_list2 = position[1]; + position[1] = NULL; + } + else + { + while (--latest_length) + position[1] = position[1]->next; + + *position_list2 = position[1]->next; + position[1]->next = start_position[1]; + } + + token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens, + subpool); + token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens, + subpool); + + *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0], + token_counts[1], num_tokens, 0, 0, subpool); + + /* Fix up the EOF lcs element in case one of + * the two sequences was NULL. + */ + if ((*lcs_ref)->position[0]->offset == 1) + (*lcs_ref)->position[0] = *position_list1; + + if ((*lcs_ref)->position[1]->offset == 1) + (*lcs_ref)->position[1] = *position_list2; + + /* Produce the resolved diff */ + while (1) + { + if (modified_start < lcs->position[0]->offset + || latest_start < lcs->position[1]->offset) + { + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_conflict; + (*diff_ref)->original_start = hunk->original_start; + (*diff_ref)->original_length = hunk->original_length; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = lcs->position[0]->offset + - modified_start; + (*diff_ref)->latest_start = latest_start - 1; + (*diff_ref)->latest_length = lcs->position[1]->offset + - latest_start; + (*diff_ref)->resolved_diff = NULL; + + diff_ref = &(*diff_ref)->next; + } + + /* Detect the EOF */ + if (lcs->length == 0) + break; + + modified_start = lcs->position[0]->offset; + latest_start = lcs->position[1]->offset; + + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_diff_common; + (*diff_ref)->original_start = hunk->original_start; + (*diff_ref)->original_length = hunk->original_length; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = lcs->length; + (*diff_ref)->latest_start = latest_start - 1; + (*diff_ref)->latest_length = lcs->length; + (*diff_ref)->resolved_diff = NULL; + + diff_ref = &(*diff_ref)->next; + + modified_start += lcs->length; + latest_start += lcs->length; + + lcs = lcs->next; + } + + *diff_ref = NULL; + + svn_pool_destroy(subpool); +} + + +svn_error_t * +svn_diff_diff3_2(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns2_t *vtable, + apr_pool_t *pool) +{ + svn_diff__tree_t *tree; + svn_diff__position_t *position_list[3]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[3]; + svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, + svn_diff_datasource_modified, + svn_diff_datasource_latest}; + svn_diff__lcs_t *lcs_om; + svn_diff__lcs_t *lcs_ol; + apr_pool_t *subpool; + apr_pool_t *treepool; + apr_off_t prefix_lines = 0; + apr_off_t suffix_lines = 0; + + *diff = NULL; + + subpool = svn_pool_create(pool); + treepool = svn_pool_create(pool); + + svn_diff__tree_create(&tree, treepool); + + SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines, + datasource, 3)); + + SVN_ERR(svn_diff__get_tokens(&position_list[0], + tree, + diff_baton, vtable, + svn_diff_datasource_original, + prefix_lines, + subpool)); + + SVN_ERR(svn_diff__get_tokens(&position_list[1], + tree, + diff_baton, vtable, + svn_diff_datasource_modified, + prefix_lines, + subpool)); + + SVN_ERR(svn_diff__get_tokens(&position_list[2], + tree, + diff_baton, vtable, + svn_diff_datasource_latest, + prefix_lines, + subpool)); + + num_tokens = svn_diff__get_node_count(tree); + + /* Get rid of the tokens, we don't need them to calc the diff */ + if (vtable->token_discard_all != NULL) + vtable->token_discard_all(diff_baton); + + /* We don't need the nodes in the tree either anymore, nor the tree itself */ + svn_pool_destroy(treepool); + + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, + subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, + subpool); + token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, + subpool); + + /* Get the lcs for original-modified and original-latest */ + lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, + suffix_lines, subpool); + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0], + token_counts[2], num_tokens, prefix_lines, + suffix_lines, subpool); + + /* Produce a merged diff */ + { + svn_diff_t **diff_ref = diff; + + apr_off_t original_start = 1; + apr_off_t modified_start = 1; + apr_off_t latest_start = 1; + apr_off_t original_sync; + apr_off_t modified_sync; + apr_off_t latest_sync; + apr_off_t common_length; + apr_off_t modified_length; + apr_off_t latest_length; + svn_boolean_t is_modified; + svn_boolean_t is_latest; + svn_diff__position_t sentinel_position[2]; + + /* Point the position lists to the start of the list + * so that common_diff/conflict detection actually is + * able to work. + */ + if (position_list[1]) + { + sentinel_position[0].next = position_list[1]->next; + sentinel_position[0].offset = position_list[1]->offset + 1; + position_list[1]->next = &sentinel_position[0]; + position_list[1] = sentinel_position[0].next; + } + else + { + sentinel_position[0].offset = prefix_lines + 1; + sentinel_position[0].next = NULL; + position_list[1] = &sentinel_position[0]; + } + + if (position_list[2]) + { + sentinel_position[1].next = position_list[2]->next; + sentinel_position[1].offset = position_list[2]->offset + 1; + position_list[2]->next = &sentinel_position[1]; + position_list[2] = sentinel_position[1].next; + } + else + { + sentinel_position[1].offset = prefix_lines + 1; + sentinel_position[1].next = NULL; + position_list[2] = &sentinel_position[1]; + } + + while (1) + { + /* Find the sync points */ + while (1) + { + if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset) + { + original_sync = lcs_om->position[0]->offset; + + while (lcs_ol->position[0]->offset + lcs_ol->length + < original_sync) + lcs_ol = lcs_ol->next; + + /* If the sync point is the EOF, and our current lcs segment + * doesn't reach as far as EOF, we need to skip this segment. + */ + if (lcs_om->length == 0 && lcs_ol->length > 0 + && lcs_ol->position[0]->offset + lcs_ol->length + == original_sync + && lcs_ol->position[1]->offset + lcs_ol->length + != lcs_ol->next->position[1]->offset) + lcs_ol = lcs_ol->next; + + if (lcs_ol->position[0]->offset <= original_sync) + break; + } + else + { + original_sync = lcs_ol->position[0]->offset; + + while (lcs_om->position[0]->offset + lcs_om->length + < original_sync) + lcs_om = lcs_om->next; + + /* If the sync point is the EOF, and our current lcs segment + * doesn't reach as far as EOF, we need to skip this segment. + */ + if (lcs_ol->length == 0 && lcs_om->length > 0 + && lcs_om->position[0]->offset + lcs_om->length + == original_sync + && lcs_om->position[1]->offset + lcs_om->length + != lcs_om->next->position[1]->offset) + lcs_om = lcs_om->next; + + if (lcs_om->position[0]->offset <= original_sync) + break; + } + } + + modified_sync = lcs_om->position[1]->offset + + (original_sync - lcs_om->position[0]->offset); + latest_sync = lcs_ol->position[1]->offset + + (original_sync - lcs_ol->position[0]->offset); + + /* Determine what is modified, if anything */ + is_modified = lcs_om->position[0]->offset - original_start > 0 + || lcs_om->position[1]->offset - modified_start > 0; + + is_latest = lcs_ol->position[0]->offset - original_start > 0 + || lcs_ol->position[1]->offset - latest_start > 0; + + if (is_modified || is_latest) + { + modified_length = modified_sync - modified_start; + latest_length = latest_sync - latest_start; + + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->original_start = original_start - 1; + (*diff_ref)->original_length = original_sync - original_start; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = modified_length; + (*diff_ref)->latest_start = latest_start - 1; + (*diff_ref)->latest_length = latest_length; + (*diff_ref)->resolved_diff = NULL; + + if (is_modified && is_latest) + { + svn_diff__resolve_conflict(*diff_ref, + &position_list[1], + &position_list[2], + num_tokens, + pool); + } + else if (is_modified) + { + (*diff_ref)->type = svn_diff__type_diff_modified; + } + else + { + (*diff_ref)->type = svn_diff__type_diff_latest; + } + + diff_ref = &(*diff_ref)->next; + } + + /* Detect EOF */ + if (lcs_om->length == 0 || lcs_ol->length == 0) + break; + + modified_length = lcs_om->length + - (original_sync - lcs_om->position[0]->offset); + latest_length = lcs_ol->length + - (original_sync - lcs_ol->position[0]->offset); + common_length = modified_length < latest_length + ? modified_length : latest_length; + + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_common; + (*diff_ref)->original_start = original_sync - 1; + (*diff_ref)->original_length = common_length; + (*diff_ref)->modified_start = modified_sync - 1; + (*diff_ref)->modified_length = common_length; + (*diff_ref)->latest_start = latest_sync - 1; + (*diff_ref)->latest_length = common_length; + (*diff_ref)->resolved_diff = NULL; + + diff_ref = &(*diff_ref)->next; + + /* Set the new offsets */ + original_start = original_sync + common_length; + modified_start = modified_sync + common_length; + latest_start = latest_sync + common_length; + + /* Make it easier for diff_common/conflict detection + by recording last lcs start positions + */ + if (position_list[1]->offset < lcs_om->position[1]->offset) + position_list[1] = lcs_om->position[1]; + + if (position_list[2]->offset < lcs_ol->position[1]->offset) + position_list[2] = lcs_ol->position[1]; + + /* Make sure we are pointing to lcs entries beyond + * the range we just processed + */ + while (original_start >= lcs_om->position[0]->offset + lcs_om->length + && lcs_om->length > 0) + { + lcs_om = lcs_om->next; + } + + while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length + && lcs_ol->length > 0) + { + lcs_ol = lcs_ol->next; + } + } + + *diff_ref = NULL; + } + + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_diff/diff4.c b/subversion/libsvn_diff/diff4.c new file mode 100644 index 0000000..9f3cb8c --- /dev/null +++ b/subversion/libsvn_diff/diff4.c @@ -0,0 +1,314 @@ +/* + * diff.c : routines for doing diffs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "svn_pools.h" +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" + +#include "diff.h" + +/* + * Variance adjustment rules: + * + * See notes/variance-adjusted-patching.html + * + * ###: Expand this comment to contain the full set of adjustment + * ###: rules instead of pointing to a webpage. + */ + +/* + * In the text below consider the following: + * + * O = Original + * M = Modified + * L = Latest + * A = Ancestor + * X:Y = diff between X and Y + * X:Y:Z = 3-way diff between X, Y and Z + * P = O:L, possibly adjusted + * + * diff4 -- Variance adjusted diff algorithm + * + * 1. Create a diff O:L and call that P. + * + * 2. Morph P into a 3-way diff by performing the following + * transformation: O:L -> O:O:L. + * + * 3. Create a diff A:O. + * + * 4. Using A:O... + * + * #. Using M:A... + * + * #. Resolve conflicts... + * + + 1. Out-range added line: decrement the line numbers in every hunk in P + that comes after the addition. This undoes the effect of the add, since + the add never happened in D. + + 2. Out-range deleted line: increment the line numbers in every hunk in P + that comes after the deletion. This undoes the effect of the deletion, + since the deletion never happened in D. + + 3. Out-range edited line: do nothing. Out-range edits are irrelevant to P. + + 4. Added line in context range in P: remove the corresponding line from + the context, optionally replacing it with new context based on that + region in M, and adjust line numbers and mappings appropriately. + + 5. Added line in affected text range in P: this is a dependency problem + -- part of the change T:18-T:19 depends on changes introduced to T after + B branched. There are several possible behaviors, depending on what the + user wants. One is to generate an informative error, stating that + T:18-T:19 depends on some other change (T:N-T:M, where N>=8, M<=18, + and M-N == 1); the exact revisions can be discovered automatically using + the same process as "cvs annotate", though it may take some time to do + so. Another option is to include the change in P, as an insertion of the + "after" version of the text, and adjust line numbers and mappings + accordingly. (And if all this isn't sounding a lot like a directory + merge algorithm, try drinking more of the Kool-Aid.) A third option is + to include it as an insertion, but with metadata (such as CVS-style + conflict markers) indicating that the line attempting to be patched + does not exist in B. + + 6. Deleted line that is in-range in P: request another universe -- this + situation can't happen in ours. + + 7. In-range edited line: reverse that edit in the "before" version of the + corresponding line in the appropriate hunk in P, to obtain the version of + the line that will be found in B when P is applied. +*/ + + +static void +adjust_diff(svn_diff_t *diff, svn_diff_t *adjust) +{ + svn_diff_t *hunk; + apr_off_t range_start; + apr_off_t range_end; + apr_off_t adjustment; + + for (; adjust; adjust = adjust->next) + { + range_start = adjust->modified_start; + range_end = range_start + adjust->modified_length; + adjustment = adjust->original_length - adjust->modified_length; + + /* No change in line count, so no modifications. [3, 7] */ + if (adjustment == 0) + continue; + + for (hunk = diff; hunk; hunk = hunk->next) + { + /* Changes are in the range before this hunk. Adjust the start + * of the hunk. [1, 2] + */ + if (hunk->modified_start >= range_end) + { + hunk->modified_start += adjustment; + continue; + } + + /* Changes are in the range beyond this hunk. No adjustments + * needed. [1, 2] + */ + if (hunk->modified_start + hunk->modified_length <= range_start) + continue; + + /* From here on changes are in the range of this hunk. */ + + /* This is a context hunk. Adjust the length. [4] + */ + if (hunk->type == svn_diff__type_diff_modified) + { + hunk->modified_length += adjustment; + continue; + } + + /* Mark as conflicted. This happens in the reverse case when a line + * is added in range and in the forward case when a line is deleted + * in range. [5 (reverse), 6 (forward)] + */ + if (adjustment < 0) + hunk->type = svn_diff__type_conflict; + + /* Adjust the length of this hunk (reverse the change). [5, 6] */ + hunk->modified_length -= adjustment; + } + } +} + +svn_error_t * +svn_diff_diff4_2(svn_diff_t **diff, + void *diff_baton, + const svn_diff_fns2_t *vtable, + apr_pool_t *pool) +{ + svn_diff__tree_t *tree; + svn_diff__position_t *position_list[4]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[4]; + svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, + svn_diff_datasource_modified, + svn_diff_datasource_latest, + svn_diff_datasource_ancestor}; + svn_diff__lcs_t *lcs_ol; + svn_diff__lcs_t *lcs_adjust; + svn_diff_t *diff_ol; + svn_diff_t *diff_adjust; + svn_diff_t *hunk; + apr_pool_t *subpool; + apr_pool_t *subpool2; + apr_pool_t *subpool3; + apr_off_t prefix_lines = 0; + apr_off_t suffix_lines = 0; + + *diff = NULL; + + subpool = svn_pool_create(pool); + subpool2 = svn_pool_create(subpool); + subpool3 = svn_pool_create(subpool2); + + svn_diff__tree_create(&tree, subpool3); + + SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines, + datasource, 4)); + + SVN_ERR(svn_diff__get_tokens(&position_list[0], + tree, + diff_baton, vtable, + svn_diff_datasource_original, + prefix_lines, + subpool2)); + + SVN_ERR(svn_diff__get_tokens(&position_list[1], + tree, + diff_baton, vtable, + svn_diff_datasource_modified, + prefix_lines, + subpool)); + + SVN_ERR(svn_diff__get_tokens(&position_list[2], + tree, + diff_baton, vtable, + svn_diff_datasource_latest, + prefix_lines, + subpool)); + + SVN_ERR(svn_diff__get_tokens(&position_list[3], + tree, + diff_baton, vtable, + svn_diff_datasource_ancestor, + prefix_lines, + subpool2)); + + num_tokens = svn_diff__get_node_count(tree); + + /* Get rid of the tokens, we don't need them to calc the diff */ + if (vtable->token_discard_all != NULL) + vtable->token_discard_all(diff_baton); + + /* We don't need the nodes in the tree either anymore, nor the tree itself */ + svn_pool_clear(subpool3); + + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, + subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, + subpool); + token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, + subpool); + token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens, + subpool); + + /* Get the lcs for original - latest */ + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], + token_counts[0], token_counts[2], + num_tokens, prefix_lines, + suffix_lines, subpool3); + diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool); + + svn_pool_clear(subpool3); + + for (hunk = diff_ol; hunk; hunk = hunk->next) + { + hunk->latest_start = hunk->modified_start; + hunk->latest_length = hunk->modified_length; + hunk->modified_start = hunk->original_start; + hunk->modified_length = hunk->original_length; + + if (hunk->type == svn_diff__type_diff_modified) + hunk->type = svn_diff__type_diff_latest; + else + hunk->type = svn_diff__type_diff_modified; + } + + /* Get the lcs for common ancestor - original + * Do reverse adjustements + */ + lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], + token_counts[3], token_counts[2], + num_tokens, prefix_lines, + suffix_lines, subpool3); + diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); + adjust_diff(diff_ol, diff_adjust); + + svn_pool_clear(subpool3); + + /* Get the lcs for modified - common ancestor + * Do forward adjustments + */ + lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], + token_counts[1], token_counts[3], + num_tokens, prefix_lines, + suffix_lines, subpool3); + diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); + adjust_diff(diff_ol, diff_adjust); + + /* Get rid of the position lists for original and ancestor, and delete + * our scratchpool. + */ + svn_pool_destroy(subpool2); + + /* Now we try and resolve the conflicts we encountered */ + for (hunk = diff_ol; hunk; hunk = hunk->next) + { + if (hunk->type == svn_diff__type_conflict) + { + svn_diff__resolve_conflict(hunk, &position_list[1], + &position_list[2], num_tokens, pool); + } + } + + svn_pool_destroy(subpool); + + *diff = diff_ol; + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_diff/diff_file.c b/subversion/libsvn_diff/diff_file.c new file mode 100644 index 0000000..e70c2f9 --- /dev/null +++ b/subversion/libsvn_diff/diff_file.c @@ -0,0 +1,2414 @@ +/* + * diff_file.c : routines for doing diffs on files + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> +#include <apr_file_io.h> +#include <apr_file_info.h> +#include <apr_time.h> +#include <apr_mmap.h> +#include <apr_getopt.h> + +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" +#include "svn_string.h" +#include "svn_subst.h" +#include "svn_io.h" +#include "svn_utf.h" +#include "svn_pools.h" +#include "diff.h" +#include "svn_private_config.h" +#include "svn_path.h" +#include "svn_ctype.h" + +#include "private/svn_utf_private.h" +#include "private/svn_eol_private.h" +#include "private/svn_dep_compat.h" +#include "private/svn_adler32.h" +#include "private/svn_diff_private.h" + +/* A token, i.e. a line read from a file. */ +typedef struct svn_diff__file_token_t +{ + /* Next token in free list. */ + struct svn_diff__file_token_t *next; + svn_diff_datasource_e datasource; + /* Offset in the datasource. */ + apr_off_t offset; + /* Offset of the normalized token (may skip leading whitespace) */ + apr_off_t norm_offset; + /* Total length - before normalization. */ + apr_off_t raw_length; + /* Total length - after normalization. */ + apr_off_t length; +} svn_diff__file_token_t; + + +typedef struct svn_diff__file_baton_t +{ + const svn_diff_file_options_t *options; + + struct file_info { + const char *path; /* path to this file, absolute or relative to CWD */ + + /* All the following fields are active while this datasource is open */ + apr_file_t *file; /* handle of this file */ + apr_off_t size; /* total raw size in bytes of this file */ + + /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */ + int chunk; /* the current chunk number, zero-based */ + char *buffer; /* a buffer containing the current chunk */ + char *curp; /* current position in the current chunk */ + char *endp; /* next memory address after the current chunk */ + + svn_diff__normalize_state_t normalize_state; + + /* Where the identical suffix starts in this datasource */ + int suffix_start_chunk; + apr_off_t suffix_offset_in_chunk; + } files[4]; + + /* List of free tokens that may be reused. */ + svn_diff__file_token_t *tokens; + + apr_pool_t *pool; +} svn_diff__file_baton_t; + +static int +datasource_to_index(svn_diff_datasource_e datasource) +{ + switch (datasource) + { + case svn_diff_datasource_original: + return 0; + + case svn_diff_datasource_modified: + return 1; + + case svn_diff_datasource_latest: + return 2; + + case svn_diff_datasource_ancestor: + return 3; + } + + return -1; +} + +/* Files are read in chunks of 128k. There is no support for this number + * whatsoever. If there is a number someone comes up with that has some + * argumentation, let's use that. + */ +/* If you change this number, update test_norm_offset(), + * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c. + */ +#define CHUNK_SHIFT 17 +#define CHUNK_SIZE (1 << CHUNK_SHIFT) + +#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT) +#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT) +#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1)) + + +/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for + * *LENGTH. The actual bytes read are stored in *LENGTH on return. + */ +static APR_INLINE svn_error_t * +read_chunk(apr_file_t *file, const char *path, + char *buffer, apr_off_t length, + apr_off_t offset, apr_pool_t *pool) +{ + /* XXX: The final offset may not be the one we asked for. + * XXX: Check. + */ + SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); + return svn_io_file_read_full2(file, buffer, (apr_size_t) length, + NULL, NULL, pool); +} + + +/* Map or read a file at PATH. *BUFFER will point to the file + * contents; if the file was mapped, *FILE and *MM will contain the + * mmap context; otherwise they will be NULL. SIZE will contain the + * file size. Allocate from POOL. + */ +#if APR_HAS_MMAP +#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME, +#define MMAP_T_ARG(NAME) &(NAME), +#else +#define MMAP_T_PARAM(NAME) +#define MMAP_T_ARG(NAME) +#endif + +static svn_error_t * +map_or_read_file(apr_file_t **file, + MMAP_T_PARAM(mm) + char **buffer, apr_off_t *size, + const char *path, apr_pool_t *pool) +{ + apr_finfo_t finfo; + apr_status_t rv; + + *buffer = NULL; + + SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); + SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool)); + +#if APR_HAS_MMAP + if (finfo.size > APR_MMAP_THRESHOLD) + { + rv = apr_mmap_create(mm, *file, 0, (apr_size_t) finfo.size, + APR_MMAP_READ, pool); + if (rv == APR_SUCCESS) + { + *buffer = (*mm)->mm; + } + + /* On failure we just fall through and try reading the file into + * memory instead. + */ + } +#endif /* APR_HAS_MMAP */ + + if (*buffer == NULL && finfo.size > 0) + { + *buffer = apr_palloc(pool, (apr_size_t) finfo.size); + + SVN_ERR(svn_io_file_read_full2(*file, *buffer, (apr_size_t) finfo.size, + NULL, NULL, pool)); + + /* Since we have the entire contents of the file we can + * close it now. + */ + SVN_ERR(svn_io_file_close(*file, pool)); + + *file = NULL; + } + + *size = finfo.size; + + return SVN_NO_ERROR; +} + + +/* For all files in the FILE array, increment the curp pointer. If a file + * points before the beginning of file, let it point at the first byte again. + * If the end of the current chunk is reached, read the next chunk in the + * buffer and point curp to the start of the chunk. If EOF is reached, set + * curp equal to endp to indicate EOF. */ +#define INCREMENT_POINTERS(all_files, files_len, pool) \ + do { \ + apr_size_t svn_macro__i; \ + \ + for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ + { \ + if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\ + (all_files)[svn_macro__i].curp++; \ + else \ + SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \ + } \ + } while (0) + + +/* For all files in the FILE array, decrement the curp pointer. If the + * start of a chunk is reached, read the previous chunk in the buffer and + * point curp to the last byte of the chunk. If the beginning of a FILE is + * reached, set chunk to -1 to indicate BOF. */ +#define DECREMENT_POINTERS(all_files, files_len, pool) \ + do { \ + apr_size_t svn_macro__i; \ + \ + for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ + { \ + if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \ + (all_files)[svn_macro__i].curp--; \ + else \ + SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \ + } \ + } while (0) + + +static svn_error_t * +increment_chunk(struct file_info *file, apr_pool_t *pool) +{ + apr_off_t length; + apr_off_t last_chunk = offset_to_chunk(file->size); + + if (file->chunk == -1) + { + /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ + file->chunk = 0; + file->curp = file->buffer; + } + else if (file->chunk == last_chunk) + { + /* We are at the last chunk. Indicate EOF by setting curp == endp. */ + file->curp = file->endp; + } + else + { + /* There are still chunks left. Read next chunk and reset pointers. */ + file->chunk++; + length = file->chunk == last_chunk ? + offset_in_chunk(file->size) : CHUNK_SIZE; + SVN_ERR(read_chunk(file->file, file->path, file->buffer, + length, chunk_to_offset(file->chunk), + pool)); + file->endp = file->buffer + length; + file->curp = file->buffer; + } + + return SVN_NO_ERROR; +} + + +static svn_error_t * +decrement_chunk(struct file_info *file, apr_pool_t *pool) +{ + if (file->chunk == 0) + { + /* We are already at the first chunk. Indicate BOF (Beginning Of File) + by setting chunk = -1 and curp = endp - 1. Both conditions are + important. They help the increment step to catch the BOF situation + in an efficient way. */ + file->chunk--; + file->curp = file->endp - 1; + } + else + { + /* Read previous chunk and reset pointers. */ + file->chunk--; + SVN_ERR(read_chunk(file->file, file->path, file->buffer, + CHUNK_SIZE, chunk_to_offset(file->chunk), + pool)); + file->endp = file->buffer + CHUNK_SIZE; + file->curp = file->endp - 1; + } + + return SVN_NO_ERROR; +} + + +/* Check whether one of the FILEs has its pointers 'before' the beginning of + * the file (this can happen while scanning backwards). This is the case if + * one of them has chunk == -1. */ +static svn_boolean_t +is_one_at_bof(struct file_info file[], apr_size_t file_len) +{ + apr_size_t i; + + for (i = 0; i < file_len; i++) + if (file[i].chunk == -1) + return TRUE; + + return FALSE; +} + +/* Check whether one of the FILEs has its pointers at EOF (this is the case if + * one of them has curp == endp (this can only happen at the last chunk)) */ +static svn_boolean_t +is_one_at_eof(struct file_info file[], apr_size_t file_len) +{ + apr_size_t i; + + for (i = 0; i < file_len; i++) + if (file[i].curp == file[i].endp) + return TRUE; + + return FALSE; +} + +/* Quickly determine whether there is a eol char in CHUNK. + * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start). + */ + +#if SVN_UNALIGNED_ACCESS_IS_OK +static svn_boolean_t contains_eol(apr_uintptr_t chunk) +{ + apr_uintptr_t r_test = chunk ^ SVN__R_MASK; + apr_uintptr_t n_test = chunk ^ SVN__N_MASK; + + r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; + n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; + + return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET; +} +#endif + +/* Find the prefix which is identical between all elements of the FILE array. + * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be + * set to TRUE if one of the FILEs reached its end while scanning prefix, + * i.e. at least one file consisted entirely of prefix. Otherwise, + * REACHED_ONE_EOF is set to FALSE. + * + * After this function is finished, the buffers, chunks, curp's and endp's + * of the FILEs are set to point at the first byte after the prefix. */ +static svn_error_t * +find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines, + struct file_info file[], apr_size_t file_len, + apr_pool_t *pool) +{ + svn_boolean_t had_cr = FALSE; + svn_boolean_t is_match; + apr_off_t lines = 0; + apr_size_t i; + + *reached_one_eof = FALSE; + + for (i = 1, is_match = TRUE; i < file_len; i++) + is_match = is_match && *file[0].curp == *file[i].curp; + while (is_match) + { +#if SVN_UNALIGNED_ACCESS_IS_OK + apr_ssize_t max_delta, delta; +#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ + + /* ### TODO: see if we can take advantage of + diff options like ignore_eol_style or ignore_space. */ + /* check for eol, and count */ + if (*file[0].curp == '\r') + { + lines++; + had_cr = TRUE; + } + else if (*file[0].curp == '\n' && !had_cr) + { + lines++; + } + else + { + had_cr = FALSE; + } + + INCREMENT_POINTERS(file, file_len, pool); + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Try to advance as far as possible with machine-word granularity. + * Determine how far we may advance with chunky ops without reaching + * endp for any of the files. + * Signedness is important here if curp gets close to endp. + */ + max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t); + for (i = 1; i < file_len; i++) + { + delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t); + if (delta < max_delta) + max_delta = delta; + } + + is_match = TRUE; + for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t)) + { + apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta); + if (contains_eol(chunk)) + break; + + for (i = 1; i < file_len; i++) + if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta)) + { + is_match = FALSE; + break; + } + + if (! is_match) + break; + } + + if (delta /* > 0*/) + { + /* We either found a mismatch or an EOL at or shortly behind curp+delta + * or we cannot proceed with chunky ops without exceeding endp. + * In any way, everything up to curp + delta is equal and not an EOL. + */ + for (i = 0; i < file_len; i++) + file[i].curp += delta; + + /* Skipped data without EOL markers, so last char was not a CR. */ + had_cr = FALSE; + } +#endif + + *reached_one_eof = is_one_at_eof(file, file_len); + if (*reached_one_eof) + break; + else + for (i = 1, is_match = TRUE; i < file_len; i++) + is_match = is_match && *file[0].curp == *file[i].curp; + } + + if (had_cr) + { + /* Check if we ended in the middle of a \r\n for one file, but \r for + another. If so, back up one byte, so the next loop will back up + the entire line. Also decrement lines, since we counted one + too many for the \r. */ + svn_boolean_t ended_at_nonmatching_newline = FALSE; + for (i = 0; i < file_len; i++) + if (file[i].curp < file[i].endp) + ended_at_nonmatching_newline = ended_at_nonmatching_newline + || *file[i].curp == '\n'; + if (ended_at_nonmatching_newline) + { + lines--; + DECREMENT_POINTERS(file, file_len, pool); + } + } + + /* Back up one byte, so we point at the last identical byte */ + DECREMENT_POINTERS(file, file_len, pool); + + /* Back up to the last eol sequence (\n, \r\n or \r) */ + while (!is_one_at_bof(file, file_len) && + *file[0].curp != '\n' && *file[0].curp != '\r') + DECREMENT_POINTERS(file, file_len, pool); + + /* Slide one byte forward, to point past the eol sequence */ + INCREMENT_POINTERS(file, file_len, pool); + + *prefix_lines = lines; + + return SVN_NO_ERROR; +} + + +/* The number of identical suffix lines to keep with the middle section. These + * lines are not eliminated as suffix, and can be picked up by the token + * parsing and lcs steps. This is mainly for backward compatibility with + * the previous diff (and blame) output (if there are multiple diff solutions, + * our lcs algorithm prefers taking common lines from the start, rather than + * from the end. By giving it back some suffix lines, we give it some wiggle + * room to find the exact same diff as before). + * + * The number 50 is more or less arbitrary, based on some real-world tests + * with big files (and then doubling the required number to be on the safe + * side). This has a negligible effect on the power of the optimization. */ +/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */ +#ifndef SUFFIX_LINES_TO_KEEP +#define SUFFIX_LINES_TO_KEEP 50 +#endif + +/* Find the suffix which is identical between all elements of the FILE array. + * Return the number of suffix lines in SUFFIX_LINES. + * + * Before this function is called the FILEs' pointers and chunks should be + * positioned right after the identical prefix (which is the case after + * find_identical_prefix), so we can determine where suffix scanning should + * ultimately stop. */ +static svn_error_t * +find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[], + apr_size_t file_len, apr_pool_t *pool) +{ + struct file_info file_for_suffix[4] = { { 0 } }; + apr_off_t length[4]; + apr_off_t suffix_min_chunk0; + apr_off_t suffix_min_offset0; + apr_off_t min_file_size; + int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP; + svn_boolean_t is_match; + apr_off_t lines = 0; + svn_boolean_t had_cr; + svn_boolean_t had_nl; + apr_size_t i; + + /* Initialize file_for_suffix[]. + Read last chunk, position curp at last byte. */ + for (i = 0; i < file_len; i++) + { + file_for_suffix[i].path = file[i].path; + file_for_suffix[i].file = file[i].file; + file_for_suffix[i].size = file[i].size; + file_for_suffix[i].chunk = + (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */ + length[i] = offset_in_chunk(file_for_suffix[i].size); + if (length[i] == 0) + { + /* last chunk is an empty chunk -> start at next-to-last chunk */ + file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1; + length[i] = CHUNK_SIZE; + } + + if (file_for_suffix[i].chunk == file[i].chunk) + { + /* Prefix ended in last chunk, so we can reuse the prefix buffer */ + file_for_suffix[i].buffer = file[i].buffer; + } + else + { + /* There is at least more than 1 chunk, + so allocate full chunk size buffer */ + file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE); + SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path, + file_for_suffix[i].buffer, length[i], + chunk_to_offset(file_for_suffix[i].chunk), + pool)); + } + file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i]; + file_for_suffix[i].curp = file_for_suffix[i].endp - 1; + } + + /* Get the chunk and pointer offset (for file[0]) at which we should stop + scanning backward for the identical suffix, i.e. when we reach prefix. */ + suffix_min_chunk0 = file[0].chunk; + suffix_min_offset0 = file[0].curp - file[0].buffer; + + /* Compensate if other files are smaller than file[0] */ + for (i = 1, min_file_size = file[0].size; i < file_len; i++) + if (file[i].size < min_file_size) + min_file_size = file[i].size; + if (file[0].size > min_file_size) + { + suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE; + suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE; + } + + /* Scan backwards until mismatch or until we reach the prefix. */ + for (i = 1, is_match = TRUE; i < file_len; i++) + is_match = is_match + && *file_for_suffix[0].curp == *file_for_suffix[i].curp; + if (is_match && *file_for_suffix[0].curp != '\r' + && *file_for_suffix[0].curp != '\n') + /* Count an extra line for the last line not ending in an eol. */ + lines++; + + had_nl = FALSE; + while (is_match) + { + svn_boolean_t reached_prefix; +#if SVN_UNALIGNED_ACCESS_IS_OK + /* Initialize the minimum pointer positions. */ + const char *min_curp[4]; + svn_boolean_t can_read_word; +#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ + + /* ### TODO: see if we can take advantage of + diff options like ignore_eol_style or ignore_space. */ + /* check for eol, and count */ + if (*file_for_suffix[0].curp == '\n') + { + lines++; + had_nl = TRUE; + } + else if (*file_for_suffix[0].curp == '\r' && !had_nl) + { + lines++; + } + else + { + had_nl = FALSE; + } + + DECREMENT_POINTERS(file_for_suffix, file_len, pool); + +#if SVN_UNALIGNED_ACCESS_IS_OK + for (i = 0; i < file_len; i++) + min_curp[i] = file_for_suffix[i].buffer; + + /* If we are in the same chunk that contains the last part of the common + prefix, use the min_curp[0] pointer to make sure we don't get a + suffix that overlaps the already determined common prefix. */ + if (file_for_suffix[0].chunk == suffix_min_chunk0) + min_curp[0] += suffix_min_offset0; + + /* Scan quickly by reading with machine-word granularity. */ + for (i = 0, can_read_word = TRUE; i < file_len; i++) + can_read_word = can_read_word + && ( (file_for_suffix[i].curp + 1 + - sizeof(apr_uintptr_t)) + > min_curp[i]); + while (can_read_word) + { + apr_uintptr_t chunk; + + /* For each file curp is positioned at the current byte, but we + want to examine the current byte and the ones before the current + location as one machine word. */ + + chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1 + - sizeof(apr_uintptr_t)); + if (contains_eol(chunk)) + break; + + for (i = 1, is_match = TRUE; i < file_len; i++) + is_match = is_match + && ( chunk + == *(const apr_uintptr_t *) + (file_for_suffix[i].curp + 1 + - sizeof(apr_uintptr_t))); + + if (! is_match) + break; + + for (i = 0; i < file_len; i++) + { + file_for_suffix[i].curp -= sizeof(apr_uintptr_t); + can_read_word = can_read_word + && ( (file_for_suffix[i].curp + 1 + - sizeof(apr_uintptr_t)) + > min_curp[i]); + } + + /* We skipped some bytes, so there are no closing EOLs */ + had_nl = FALSE; + had_cr = FALSE; + } + + /* The > min_curp[i] check leaves at least one final byte for checking + in the non block optimized case below. */ +#endif + + reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 + && (file_for_suffix[0].curp - file_for_suffix[0].buffer) + == suffix_min_offset0; + if (reached_prefix || is_one_at_bof(file_for_suffix, file_len)) + break; + + is_match = TRUE; + for (i = 1; i < file_len; i++) + is_match = is_match + && *file_for_suffix[0].curp == *file_for_suffix[i].curp; + } + + /* Slide one byte forward, to point at the first byte of identical suffix */ + INCREMENT_POINTERS(file_for_suffix, file_len, pool); + + /* Slide forward until we find an eol sequence to add the rest of the line + we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least + one file reaches its end. */ + do + { + had_cr = FALSE; + while (!is_one_at_eof(file_for_suffix, file_len) + && *file_for_suffix[0].curp != '\n' + && *file_for_suffix[0].curp != '\r') + INCREMENT_POINTERS(file_for_suffix, file_len, pool); + + /* Slide one or two more bytes, to point past the eol. */ + if (!is_one_at_eof(file_for_suffix, file_len) + && *file_for_suffix[0].curp == '\r') + { + lines--; + had_cr = TRUE; + INCREMENT_POINTERS(file_for_suffix, file_len, pool); + } + if (!is_one_at_eof(file_for_suffix, file_len) + && *file_for_suffix[0].curp == '\n') + { + if (!had_cr) + lines--; + INCREMENT_POINTERS(file_for_suffix, file_len, pool); + } + } + while (!is_one_at_eof(file_for_suffix, file_len) + && suffix_lines_to_keep--); + + if (is_one_at_eof(file_for_suffix, file_len)) + lines = 0; + + /* Save the final suffix information in the original file_info */ + for (i = 0; i < file_len; i++) + { + file[i].suffix_start_chunk = file_for_suffix[i].chunk; + file[i].suffix_offset_in_chunk = + file_for_suffix[i].curp - file_for_suffix[i].buffer; + } + + *suffix_lines = lines; + + return SVN_NO_ERROR; +} + + +/* Let FILE stand for the array of file_info struct elements of BATON->files + * that are indexed by the elements of the DATASOURCE array. + * BATON's type is (svn_diff__file_baton_t *). + * + * For each file in the FILE array, open the file at FILE.path; initialize + * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a + * buffer and read the first chunk. Then find the prefix and suffix lines + * which are identical between all the files. Return the number of identical + * prefix lines in PREFIX_LINES, and the number of identical suffix lines in + * SUFFIX_LINES. + * + * Finding the identical prefix and suffix allows us to exclude those from the + * rest of the diff algorithm, which increases performance by reducing the + * problem space. + * + * Implements svn_diff_fns2_t::datasources_open. */ +static svn_error_t * +datasources_open(void *baton, + apr_off_t *prefix_lines, + apr_off_t *suffix_lines, + const svn_diff_datasource_e *datasources, + apr_size_t datasources_len) +{ + svn_diff__file_baton_t *file_baton = baton; + struct file_info files[4]; + apr_finfo_t finfo[4]; + apr_off_t length[4]; +#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING + svn_boolean_t reached_one_eof; +#endif + apr_size_t i; + + /* Make sure prefix_lines and suffix_lines are set correctly, even if we + * exit early because one of the files is empty. */ + *prefix_lines = 0; + *suffix_lines = 0; + + /* Open datasources and read first chunk */ + for (i = 0; i < datasources_len; i++) + { + struct file_info *file + = &file_baton->files[datasource_to_index(datasources[i])]; + SVN_ERR(svn_io_file_open(&file->file, file->path, + APR_READ, APR_OS_DEFAULT, file_baton->pool)); + SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE, + file->file, file_baton->pool)); + file->size = finfo[i].size; + length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size; + file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]); + SVN_ERR(read_chunk(file->file, file->path, file->buffer, + length[i], 0, file_baton->pool)); + file->endp = file->buffer + length[i]; + file->curp = file->buffer; + /* Set suffix_start_chunk to a guard value, so if suffix scanning is + * skipped because one of the files is empty, or because of + * reached_one_eof, we can still easily check for the suffix during + * token reading (datasource_get_next_token). */ + file->suffix_start_chunk = -1; + + files[i] = *file; + } + + for (i = 0; i < datasources_len; i++) + if (length[i] == 0) + /* There will not be any identical prefix/suffix, so we're done. */ + return SVN_NO_ERROR; + +#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING + + SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, + files, datasources_len, file_baton->pool)); + + if (!reached_one_eof) + /* No file consisted totally of identical prefix, + * so there may be some identical suffix. */ + SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len, + file_baton->pool)); + +#endif + + /* Copy local results back to baton. */ + for (i = 0; i < datasources_len; i++) + file_baton->files[datasource_to_index(datasources[i])] = files[i]; + + return SVN_NO_ERROR; +} + + +/* Implements svn_diff_fns2_t::datasource_close */ +static svn_error_t * +datasource_close(void *baton, svn_diff_datasource_e datasource) +{ + /* Do nothing. The compare_token function needs previous datasources + * to stay available until all datasources are processed. + */ + + return SVN_NO_ERROR; +} + +/* Implements svn_diff_fns2_t::datasource_get_next_token */ +static svn_error_t * +datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, + svn_diff_datasource_e datasource) +{ + svn_diff__file_baton_t *file_baton = baton; + svn_diff__file_token_t *file_token; + struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; + char *endp; + char *curp; + char *eol; + apr_off_t last_chunk; + apr_off_t length; + apr_uint32_t h = 0; + /* Did the last chunk end in a CR character? */ + svn_boolean_t had_cr = FALSE; + + *token = NULL; + + curp = file->curp; + endp = file->endp; + + last_chunk = offset_to_chunk(file->size); + + /* Are we already at the end of a chunk? */ + if (curp == endp) + { + /* Are we at EOF */ + if (last_chunk == file->chunk) + return SVN_NO_ERROR; /* EOF */ + + /* Or right before an identical suffix in the next chunk? */ + if (file->chunk + 1 == file->suffix_start_chunk + && file->suffix_offset_in_chunk == 0) + return SVN_NO_ERROR; + } + + /* Stop when we encounter the identical suffix. If suffix scanning was not + * performed, suffix_start_chunk will be -1, so this condition will never + * be true. */ + if (file->chunk == file->suffix_start_chunk + && (curp - file->buffer) == file->suffix_offset_in_chunk) + return SVN_NO_ERROR; + + /* Allocate a new token, or fetch one from the "reusable tokens" list. */ + file_token = file_baton->tokens; + if (file_token) + { + file_baton->tokens = file_token->next; + } + else + { + file_token = apr_palloc(file_baton->pool, sizeof(*file_token)); + } + + file_token->datasource = datasource; + file_token->offset = chunk_to_offset(file->chunk) + + (curp - file->buffer); + file_token->norm_offset = file_token->offset; + file_token->raw_length = 0; + file_token->length = 0; + + while (1) + { + eol = svn_eol__find_eol_start(curp, endp - curp); + if (eol) + { + had_cr = (*eol == '\r'); + eol++; + /* If we have the whole eol sequence in the chunk... */ + if (!(had_cr && eol == endp)) + { + /* Also skip past the '\n' in an '\r\n' sequence. */ + if (had_cr && *eol == '\n') + eol++; + break; + } + } + + if (file->chunk == last_chunk) + { + eol = endp; + break; + } + + length = endp - curp; + file_token->raw_length += length; + { + char *c = curp; + + svn_diff__normalize_buffer(&c, &length, + &file->normalize_state, + curp, file_baton->options); + if (file_token->length == 0) + { + /* When we are reading the first part of the token, move the + normalized offset past leading ignored characters, if any. */ + file_token->norm_offset += (c - curp); + } + file_token->length += length; + h = svn__adler32(h, c, length); + } + + curp = endp = file->buffer; + file->chunk++; + length = file->chunk == last_chunk ? + offset_in_chunk(file->size) : CHUNK_SIZE; + endp += length; + file->endp = endp; + + /* Issue #4283: Normally we should have checked for reaching the skipped + suffix here, but because we assume that a suffix always starts on a + line and token boundary we rely on catching the suffix earlier in this + function. + + When changing things here, make sure the whitespace settings are + applied, or we mught not reach the exact suffix boundary as token + boundary. */ + SVN_ERR(read_chunk(file->file, file->path, + curp, length, + chunk_to_offset(file->chunk), + file_baton->pool)); + + /* If the last chunk ended in a CR, we're done. */ + if (had_cr) + { + eol = curp; + if (*curp == '\n') + ++eol; + break; + } + } + + length = eol - curp; + file_token->raw_length += length; + file->curp = eol; + + /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up + * with a spurious empty token. Avoid returning it. + * Note that we use the unnormalized length; we don't want a line containing + * only spaces (and no trailing newline) to appear like a non-existent + * line. */ + if (file_token->raw_length > 0) + { + char *c = curp; + svn_diff__normalize_buffer(&c, &length, + &file->normalize_state, + curp, file_baton->options); + if (file_token->length == 0) + { + /* When we are reading the first part of the token, move the + normalized offset past leading ignored characters, if any. */ + file_token->norm_offset += (c - curp); + } + + file_token->length += length; + + *hash = svn__adler32(h, c, length); + *token = file_token; + } + + return SVN_NO_ERROR; +} + +#define COMPARE_CHUNK_SIZE 4096 + +/* Implements svn_diff_fns2_t::token_compare */ +static svn_error_t * +token_compare(void *baton, void *token1, void *token2, int *compare) +{ + svn_diff__file_baton_t *file_baton = baton; + svn_diff__file_token_t *file_token[2]; + char buffer[2][COMPARE_CHUNK_SIZE]; + char *bufp[2]; + apr_off_t offset[2]; + struct file_info *file[2]; + apr_off_t length[2]; + apr_off_t total_length; + /* How much is left to read of each token from the file. */ + apr_off_t raw_length[2]; + int i; + svn_diff__normalize_state_t state[2]; + + file_token[0] = token1; + file_token[1] = token2; + if (file_token[0]->length < file_token[1]->length) + { + *compare = -1; + return SVN_NO_ERROR; + } + + if (file_token[0]->length > file_token[1]->length) + { + *compare = 1; + return SVN_NO_ERROR; + } + + total_length = file_token[0]->length; + if (total_length == 0) + { + *compare = 0; + return SVN_NO_ERROR; + } + + for (i = 0; i < 2; ++i) + { + int idx = datasource_to_index(file_token[i]->datasource); + + file[i] = &file_baton->files[idx]; + offset[i] = file_token[i]->norm_offset; + state[i] = svn_diff__normalize_state_normal; + + if (offset_to_chunk(offset[i]) == file[i]->chunk) + { + /* If the start of the token is in memory, the entire token is + * in memory. + */ + bufp[i] = file[i]->buffer; + bufp[i] += offset_in_chunk(offset[i]); + + length[i] = total_length; + raw_length[i] = 0; + } + else + { + apr_off_t skipped; + + length[i] = 0; + + /* When we skipped the first part of the token via the whitespace + normalization we must reduce the raw length of the token */ + skipped = (file_token[i]->norm_offset - file_token[i]->offset); + + raw_length[i] = file_token[i]->raw_length - skipped; + } + } + + do + { + apr_off_t len; + for (i = 0; i < 2; i++) + { + if (length[i] == 0) + { + /* Error if raw_length is 0, that's an unexpected change + * of the file that can happen when ingoring whitespace + * and that can lead to an infinite loop. */ + if (raw_length[i] == 0) + return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED, + NULL, + _("The file '%s' changed unexpectedly" + " during diff"), + file[i]->path); + + /* Read a chunk from disk into a buffer */ + bufp[i] = buffer[i]; + length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ? + COMPARE_CHUNK_SIZE : raw_length[i]; + + SVN_ERR(read_chunk(file[i]->file, + file[i]->path, + bufp[i], length[i], offset[i], + file_baton->pool)); + offset[i] += length[i]; + raw_length[i] -= length[i]; + /* bufp[i] gets reset to buffer[i] before reading each chunk, + so, overwriting it isn't a problem */ + svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i], + bufp[i], file_baton->options); + + /* assert(length[i] == file_token[i]->length); */ + } + } + + len = length[0] > length[1] ? length[1] : length[0]; + + /* Compare two chunks (that could be entire tokens if they both reside + * in memory). + */ + *compare = memcmp(bufp[0], bufp[1], (size_t) len); + if (*compare != 0) + return SVN_NO_ERROR; + + total_length -= len; + length[0] -= len; + length[1] -= len; + bufp[0] += len; + bufp[1] += len; + } + while(total_length > 0); + + *compare = 0; + return SVN_NO_ERROR; +} + + +/* Implements svn_diff_fns2_t::token_discard */ +static void +token_discard(void *baton, void *token) +{ + svn_diff__file_baton_t *file_baton = baton; + svn_diff__file_token_t *file_token = token; + + /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */ + file_token->next = file_baton->tokens; + file_baton->tokens = file_token; +} + + +/* Implements svn_diff_fns2_t::token_discard_all */ +static void +token_discard_all(void *baton) +{ + svn_diff__file_baton_t *file_baton = baton; + + /* Discard all memory in use by the tokens, and close all open files. */ + svn_pool_clear(file_baton->pool); +} + + +static const svn_diff_fns2_t svn_diff__file_vtable = +{ + datasources_open, + datasource_close, + datasource_get_next_token, + token_compare, + token_discard, + token_discard_all +}; + +/* Id for the --ignore-eol-style option, which doesn't have a short name. */ +#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256 + +/* Options supported by svn_diff_file_options_parse(). */ +static const apr_getopt_option_t diff_options[] = +{ + { "ignore-space-change", 'b', 0, NULL }, + { "ignore-all-space", 'w', 0, NULL }, + { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL }, + { "show-c-function", 'p', 0, NULL }, + /* ### For compatibility; we don't support the argument to -u, because + * ### we don't have optional argument support. */ + { "unified", 'u', 0, NULL }, + { NULL, 0, 0, NULL } +}; + +svn_diff_file_options_t * +svn_diff_file_options_create(apr_pool_t *pool) +{ + return apr_pcalloc(pool, sizeof(svn_diff_file_options_t)); +} + +/* A baton for use with opt_parsing_error_func(). */ +struct opt_parsing_error_baton_t +{ + svn_error_t *err; + apr_pool_t *pool; +}; + +/* Store an error message from apr_getopt_long(). Set BATON->err to a new + * error with a message generated from FMT and the remaining arguments. + * Implements apr_getopt_err_fn_t. */ +static void +opt_parsing_error_func(void *baton, + const char *fmt, ...) +{ + struct opt_parsing_error_baton_t *b = baton; + const char *message; + va_list ap; + + va_start(ap, fmt); + message = apr_pvsprintf(b->pool, fmt, ap); + va_end(ap); + + /* Skip leading ": " (if present, which it always is in known cases). */ + if (strncmp(message, ": ", 2) == 0) + message += 2; + + b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message); +} + +svn_error_t * +svn_diff_file_options_parse(svn_diff_file_options_t *options, + const apr_array_header_t *args, + apr_pool_t *pool) +{ + apr_getopt_t *os; + struct opt_parsing_error_baton_t opt_parsing_error_baton; + /* Make room for each option (starting at index 1) plus trailing NULL. */ + const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2)); + + opt_parsing_error_baton.err = NULL; + opt_parsing_error_baton.pool = pool; + + argv[0] = ""; + memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts); + argv[args->nelts + 1] = NULL; + + apr_getopt_init(&os, pool, args->nelts + 1, argv); + + /* Capture any error message from apr_getopt_long(). This will typically + * say which option is wrong, which we would not otherwise know. */ + os->errfn = opt_parsing_error_func; + os->errarg = &opt_parsing_error_baton; + + while (1) + { + const char *opt_arg; + int opt_id; + apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg); + + if (APR_STATUS_IS_EOF(err)) + break; + if (err) + /* Wrap apr_getopt_long()'s error message. Its doc string implies + * it always will produce one, but never mind if it doesn't. Avoid + * using the message associated with the return code ERR, because + * it refers to the "command line" which may be misleading here. */ + return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, + opt_parsing_error_baton.err, + _("Error in options to internal diff")); + + switch (opt_id) + { + case 'b': + /* -w takes precedence over -b. */ + if (! options->ignore_space) + options->ignore_space = svn_diff_file_ignore_space_change; + break; + case 'w': + options->ignore_space = svn_diff_file_ignore_space_all; + break; + case SVN_DIFF__OPT_IGNORE_EOL_STYLE: + options->ignore_eol_style = TRUE; + break; + case 'p': + options->show_c_function = TRUE; + break; + default: + break; + } + } + + /* Check for spurious arguments. */ + if (os->ind < os->argc) + return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL, + _("Invalid argument '%s' in diff options"), + os->argv[os->ind]); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_file_diff_2(svn_diff_t **diff, + const char *original, + const char *modified, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + svn_diff__file_baton_t baton = { 0 }; + + baton.options = options; + baton.files[0].path = original; + baton.files[1].path = modified; + baton.pool = svn_pool_create(pool); + + SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool)); + + svn_pool_destroy(baton.pool); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_file_diff3_2(svn_diff_t **diff, + const char *original, + const char *modified, + const char *latest, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + svn_diff__file_baton_t baton = { 0 }; + + baton.options = options; + baton.files[0].path = original; + baton.files[1].path = modified; + baton.files[2].path = latest; + baton.pool = svn_pool_create(pool); + + SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool)); + + svn_pool_destroy(baton.pool); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_file_diff4_2(svn_diff_t **diff, + const char *original, + const char *modified, + const char *latest, + const char *ancestor, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + svn_diff__file_baton_t baton = { 0 }; + + baton.options = options; + baton.files[0].path = original; + baton.files[1].path = modified; + baton.files[2].path = latest; + baton.files[3].path = ancestor; + baton.pool = svn_pool_create(pool); + + SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool)); + + svn_pool_destroy(baton.pool); + return SVN_NO_ERROR; +} + + +/** Display unified context diffs **/ + +/* Maximum length of the extra context to show when show_c_function is set. + * GNU diff uses 40, let's be brave and use 50 instead. */ +#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50 +typedef struct svn_diff__file_output_baton_t +{ + svn_stream_t *output_stream; + const char *header_encoding; + + /* Cached markers, in header_encoding. */ + const char *context_str; + const char *delete_str; + const char *insert_str; + + const char *path[2]; + apr_file_t *file[2]; + + apr_off_t current_line[2]; + + char buffer[2][4096]; + apr_size_t length[2]; + char *curp[2]; + + apr_off_t hunk_start[2]; + apr_off_t hunk_length[2]; + svn_stringbuf_t *hunk; + + /* Should we emit C functions in the unified diff header */ + svn_boolean_t show_c_function; + /* Extra strings to skip over if we match. */ + apr_array_header_t *extra_skip_match; + /* "Context" to append to the @@ line when the show_c_function option + * is set. */ + svn_stringbuf_t *extra_context; + /* Extra context for the current hunk. */ + char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1]; + + apr_pool_t *pool; +} svn_diff__file_output_baton_t; + +typedef enum svn_diff__file_output_unified_type_e +{ + svn_diff__file_output_unified_skip, + svn_diff__file_output_unified_context, + svn_diff__file_output_unified_delete, + svn_diff__file_output_unified_insert +} svn_diff__file_output_unified_type_e; + + +static svn_error_t * +output_unified_line(svn_diff__file_output_baton_t *baton, + svn_diff__file_output_unified_type_e type, int idx) +{ + char *curp; + char *eol; + apr_size_t length; + svn_error_t *err; + svn_boolean_t bytes_processed = FALSE; + svn_boolean_t had_cr = FALSE; + /* Are we collecting extra context? */ + svn_boolean_t collect_extra = FALSE; + + length = baton->length[idx]; + curp = baton->curp[idx]; + + /* Lazily update the current line even if we're at EOF. + * This way we fake output of context at EOF + */ + baton->current_line[idx]++; + + if (length == 0 && apr_file_eof(baton->file[idx])) + { + return SVN_NO_ERROR; + } + + do + { + if (length > 0) + { + if (!bytes_processed) + { + switch (type) + { + case svn_diff__file_output_unified_context: + svn_stringbuf_appendcstr(baton->hunk, baton->context_str); + baton->hunk_length[0]++; + baton->hunk_length[1]++; + break; + case svn_diff__file_output_unified_delete: + svn_stringbuf_appendcstr(baton->hunk, baton->delete_str); + baton->hunk_length[0]++; + break; + case svn_diff__file_output_unified_insert: + svn_stringbuf_appendcstr(baton->hunk, baton->insert_str); + baton->hunk_length[1]++; + break; + default: + break; + } + + if (baton->show_c_function + && (type == svn_diff__file_output_unified_skip + || type == svn_diff__file_output_unified_context) + && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_') + && !svn_cstring_match_glob_list(curp, + baton->extra_skip_match)) + { + svn_stringbuf_setempty(baton->extra_context); + collect_extra = TRUE; + } + } + + eol = svn_eol__find_eol_start(curp, length); + + if (eol != NULL) + { + apr_size_t len; + + had_cr = (*eol == '\r'); + eol++; + len = (apr_size_t)(eol - curp); + + if (! had_cr || len < length) + { + if (had_cr && *eol == '\n') + { + ++eol; + ++len; + } + + length -= len; + + if (type != svn_diff__file_output_unified_skip) + { + svn_stringbuf_appendbytes(baton->hunk, curp, len); + } + if (collect_extra) + { + svn_stringbuf_appendbytes(baton->extra_context, + curp, len); + } + + baton->curp[idx] = eol; + baton->length[idx] = length; + + err = SVN_NO_ERROR; + + break; + } + } + + if (type != svn_diff__file_output_unified_skip) + { + svn_stringbuf_appendbytes(baton->hunk, curp, length); + } + + if (collect_extra) + { + svn_stringbuf_appendbytes(baton->extra_context, curp, length); + } + + bytes_processed = TRUE; + } + + curp = baton->buffer[idx]; + length = sizeof(baton->buffer[idx]); + + err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool); + + /* If the last chunk ended with a CR, we look for an LF at the start + of this chunk. */ + if (had_cr) + { + if (! err && length > 0 && *curp == '\n') + { + if (type != svn_diff__file_output_unified_skip) + { + svn_stringbuf_appendbyte(baton->hunk, *curp); + } + /* We don't append the LF to extra_context, since it would + * just be stripped anyway. */ + ++curp; + --length; + } + + baton->curp[idx] = curp; + baton->length[idx] = length; + + break; + } + } + while (! err); + + if (err && ! APR_STATUS_IS_EOF(err->apr_err)) + return err; + + if (err && APR_STATUS_IS_EOF(err->apr_err)) + { + svn_error_clear(err); + /* Special case if we reach the end of file AND the last line is in the + changed range AND the file doesn't end with a newline */ + if (bytes_processed && (type != svn_diff__file_output_unified_skip) + && ! had_cr) + { + SVN_ERR(svn_diff__unified_append_no_newline_msg( + baton->hunk, baton->header_encoding, baton->pool)); + } + + baton->length[idx] = 0; + } + + return SVN_NO_ERROR; +} + +static APR_INLINE svn_error_t * +output_unified_diff_range(svn_diff__file_output_baton_t *output_baton, + int source, + svn_diff__file_output_unified_type_e type, + apr_off_t until) +{ + while (output_baton->current_line[source] < until) + { + SVN_ERR(output_unified_line(output_baton, type, source)); + } + return SVN_NO_ERROR; +} + +static svn_error_t * +output_unified_flush_hunk(svn_diff__file_output_baton_t *baton) +{ + apr_off_t target_line; + apr_size_t hunk_len; + apr_off_t old_start; + apr_off_t new_start; + + if (svn_stringbuf_isempty(baton->hunk)) + { + /* Nothing to flush */ + return SVN_NO_ERROR; + } + + target_line = baton->hunk_start[0] + baton->hunk_length[0] + + SVN_DIFF__UNIFIED_CONTEXT_SIZE; + + /* Add trailing context to the hunk */ + SVN_ERR(output_unified_diff_range(baton, 0 /* original */, + svn_diff__file_output_unified_context, + target_line)); + + old_start = baton->hunk_start[0]; + new_start = baton->hunk_start[1]; + + /* If the file is non-empty, convert the line indexes from + zero based to one based */ + if (baton->hunk_length[0]) + old_start++; + if (baton->hunk_length[1]) + new_start++; + + /* Write the hunk header */ + SVN_ERR(svn_diff__unified_write_hunk_header( + baton->output_stream, baton->header_encoding, "@@", + old_start, baton->hunk_length[0], + new_start, baton->hunk_length[1], + baton->hunk_extra_context, + baton->pool)); + + /* Output the hunk content */ + hunk_len = baton->hunk->len; + SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data, + &hunk_len)); + + /* Prepare for the next hunk */ + baton->hunk_length[0] = 0; + baton->hunk_length[1] = 0; + baton->hunk_start[0] = 0; + baton->hunk_start[1] = 0; + svn_stringbuf_setempty(baton->hunk); + + return SVN_NO_ERROR; +} + +static svn_error_t * +output_unified_diff_modified(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + svn_diff__file_output_baton_t *output_baton = baton; + apr_off_t context_prefix_length; + apr_off_t prev_context_end; + svn_boolean_t init_hunk = FALSE; + + if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE) + context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE; + else + context_prefix_length = original_start; + + /* Calculate where the previous hunk will end if we would write it now + (including the necessary context at the end) */ + if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) + { + prev_context_end = output_baton->hunk_start[0] + + output_baton->hunk_length[0] + + SVN_DIFF__UNIFIED_CONTEXT_SIZE; + } + else + { + prev_context_end = -1; + + if (output_baton->hunk_start[0] == 0 + && (original_length > 0 || modified_length > 0)) + init_hunk = TRUE; + } + + /* If the changed range is far enough from the previous range, flush the current + hunk. */ + { + apr_off_t new_hunk_start = (original_start - context_prefix_length); + + if (output_baton->current_line[0] < new_hunk_start + && prev_context_end <= new_hunk_start) + { + SVN_ERR(output_unified_flush_hunk(output_baton)); + init_hunk = TRUE; + } + else if (output_baton->hunk_length[0] > 0 + || output_baton->hunk_length[1] > 0) + { + /* We extend the current hunk */ + + + /* Original: Output the context preceding the changed range */ + SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, + svn_diff__file_output_unified_context, + original_start)); + } + } + + /* Original: Skip lines until we are at the beginning of the context we want + to display */ + SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, + svn_diff__file_output_unified_skip, + original_start - context_prefix_length)); + + /* Note that the above skip stores data for the show_c_function support below */ + + if (init_hunk) + { + SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 + && output_baton->hunk_length[1] == 0); + + output_baton->hunk_start[0] = original_start - context_prefix_length; + output_baton->hunk_start[1] = modified_start - context_prefix_length; + } + + if (init_hunk && output_baton->show_c_function) + { + apr_size_t p; + const char *invalid_character; + + /* Save the extra context for later use. + * Note that the last byte of the hunk_extra_context array is never + * touched after it is zero-initialized, so the array is always + * 0-terminated. */ + strncpy(output_baton->hunk_extra_context, + output_baton->extra_context->data, + SVN_DIFF__EXTRA_CONTEXT_LENGTH); + /* Trim whitespace at the end, most notably to get rid of any + * newline characters. */ + p = strlen(output_baton->hunk_extra_context); + while (p > 0 + && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1])) + { + output_baton->hunk_extra_context[--p] = '\0'; + } + invalid_character = + svn_utf__last_valid(output_baton->hunk_extra_context, + SVN_DIFF__EXTRA_CONTEXT_LENGTH); + for (p = invalid_character - output_baton->hunk_extra_context; + p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++) + { + output_baton->hunk_extra_context[p] = '\0'; + } + } + + /* Modified: Skip lines until we are at the start of the changed range */ + SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, + svn_diff__file_output_unified_skip, + modified_start)); + + /* Original: Output the context preceding the changed range */ + SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, + svn_diff__file_output_unified_context, + original_start)); + + /* Both: Output the changed range */ + SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, + svn_diff__file_output_unified_delete, + original_start + original_length)); + SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, + svn_diff__file_output_unified_insert, + modified_start + modified_length)); + + return SVN_NO_ERROR; +} + +/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */ +static svn_error_t * +output_unified_default_hdr(const char **header, const char *path, + apr_pool_t *pool) +{ + apr_finfo_t file_info; + apr_time_exp_t exploded_time; + char time_buffer[64]; + apr_size_t time_len; + const char *utf8_timestr; + + SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool)); + apr_time_exp_lt(&exploded_time, file_info.mtime); + + apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1, + /* Order of date components can be different in different languages */ + _("%a %b %e %H:%M:%S %Y"), &exploded_time); + + SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool)); + + *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr); + + return SVN_NO_ERROR; +} + +static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable = +{ + NULL, /* output_common */ + output_unified_diff_modified, + NULL, /* output_diff_latest */ + NULL, /* output_diff_common */ + NULL /* output_conflict */ +}; + +svn_error_t * +svn_diff_file_output_unified3(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_path, + const char *modified_path, + const char *original_header, + const char *modified_header, + const char *header_encoding, + const char *relative_to_dir, + svn_boolean_t show_c_function, + apr_pool_t *pool) +{ + if (svn_diff_contains_diffs(diff)) + { + svn_diff__file_output_baton_t baton; + int i; + + memset(&baton, 0, sizeof(baton)); + baton.output_stream = output_stream; + baton.pool = pool; + baton.header_encoding = header_encoding; + baton.path[0] = original_path; + baton.path[1] = modified_path; + baton.hunk = svn_stringbuf_create_empty(pool); + baton.show_c_function = show_c_function; + baton.extra_context = svn_stringbuf_create_empty(pool); + + if (show_c_function) + { + baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **)); + + APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*"; + APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*"; + APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*"; + } + + SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ", + header_encoding, pool)); + SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-", + header_encoding, pool)); + SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+", + header_encoding, pool)); + + if (relative_to_dir) + { + /* Possibly adjust the "original" and "modified" paths shown in + the output (see issue #2723). */ + const char *child_path; + + if (! original_header) + { + child_path = svn_dirent_is_child(relative_to_dir, + original_path, pool); + if (child_path) + original_path = child_path; + else + return svn_error_createf( + SVN_ERR_BAD_RELATIVE_PATH, NULL, + _("Path '%s' must be inside " + "the directory '%s'"), + svn_dirent_local_style(original_path, pool), + svn_dirent_local_style(relative_to_dir, + pool)); + } + + if (! modified_header) + { + child_path = svn_dirent_is_child(relative_to_dir, + modified_path, pool); + if (child_path) + modified_path = child_path; + else + return svn_error_createf( + SVN_ERR_BAD_RELATIVE_PATH, NULL, + _("Path '%s' must be inside " + "the directory '%s'"), + svn_dirent_local_style(modified_path, pool), + svn_dirent_local_style(relative_to_dir, + pool)); + } + } + + for (i = 0; i < 2; i++) + { + SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i], + APR_READ, APR_OS_DEFAULT, pool)); + } + + if (original_header == NULL) + { + SVN_ERR(output_unified_default_hdr(&original_header, original_path, + pool)); + } + + if (modified_header == NULL) + { + SVN_ERR(output_unified_default_hdr(&modified_header, modified_path, + pool)); + } + + SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding, + original_header, modified_header, + pool)); + + SVN_ERR(svn_diff_output(diff, &baton, + &svn_diff__file_output_unified_vtable)); + SVN_ERR(output_unified_flush_hunk(&baton)); + + for (i = 0; i < 2; i++) + { + SVN_ERR(svn_io_file_close(baton.file[i], pool)); + } + } + + return SVN_NO_ERROR; +} + + +/** Display diff3 **/ + +/* A stream to remember *leading* context. Note that this stream does + *not* copy the data that it is remembering; it just saves + *pointers! */ +typedef struct context_saver_t { + svn_stream_t *stream; + const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; + apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; + apr_size_t next_slot; + apr_size_t total_written; +} context_saver_t; + + +static svn_error_t * +context_saver_stream_write(void *baton, + const char *data, + apr_size_t *len) +{ + context_saver_t *cs = baton; + cs->data[cs->next_slot] = data; + cs->len[cs->next_slot] = *len; + cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; + cs->total_written++; + return SVN_NO_ERROR; +} + +typedef struct svn_diff3__file_output_baton_t +{ + svn_stream_t *output_stream; + + const char *path[3]; + + apr_off_t current_line[3]; + + char *buffer[3]; + char *endp[3]; + char *curp[3]; + + /* The following four members are in the encoding used for the output. */ + const char *conflict_modified; + const char *conflict_original; + const char *conflict_separator; + const char *conflict_latest; + + const char *marker_eol; + + svn_diff_conflict_display_style_t conflict_style; + + /* The rest of the fields are for + svn_diff_conflict_display_only_conflicts only. Note that for + these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or + (soon after a conflict) a "trailing context stream", never the + actual output stream.*/ + /* The actual output stream. */ + svn_stream_t *real_output_stream; + context_saver_t *context_saver; + /* Used to allocate context_saver and trailing context streams, and + for some printfs. */ + apr_pool_t *pool; +} svn_diff3__file_output_baton_t; + +static svn_error_t * +flush_context_saver(context_saver_t *cs, + svn_stream_t *output_stream) +{ + int i; + for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++) + { + apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; + if (cs->data[slot]) + { + apr_size_t len = cs->len[slot]; + SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); + } + } + return SVN_NO_ERROR; +} + +static void +make_context_saver(svn_diff3__file_output_baton_t *fob) +{ + context_saver_t *cs; + + svn_pool_clear(fob->pool); + cs = apr_pcalloc(fob->pool, sizeof(*cs)); + cs->stream = svn_stream_empty(fob->pool); + svn_stream_set_baton(cs->stream, cs); + svn_stream_set_write(cs->stream, context_saver_stream_write); + fob->context_saver = cs; + fob->output_stream = cs->stream; +} + + +/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to + BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to + a context_saver; used for *trailing* context. */ + +struct trailing_context_printer { + apr_size_t lines_to_print; + svn_diff3__file_output_baton_t *fob; +}; + + + +static svn_error_t * +trailing_context_printer_write(void *baton, + const char *data, + apr_size_t *len) +{ + struct trailing_context_printer *tcp = baton; + SVN_ERR_ASSERT(tcp->lines_to_print > 0); + SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len)); + tcp->lines_to_print--; + if (tcp->lines_to_print == 0) + make_context_saver(tcp->fob); + return SVN_NO_ERROR; +} + + +static void +make_trailing_context_printer(svn_diff3__file_output_baton_t *btn) +{ + struct trailing_context_printer *tcp; + svn_stream_t *s; + + svn_pool_clear(btn->pool); + + tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); + tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE; + tcp->fob = btn; + s = svn_stream_empty(btn->pool); + svn_stream_set_baton(s, tcp); + svn_stream_set_write(s, trailing_context_printer_write); + btn->output_stream = s; +} + + + +typedef enum svn_diff3__file_output_type_e +{ + svn_diff3__file_output_skip, + svn_diff3__file_output_normal +} svn_diff3__file_output_type_e; + + +static svn_error_t * +output_line(svn_diff3__file_output_baton_t *baton, + svn_diff3__file_output_type_e type, int idx) +{ + char *curp; + char *endp; + char *eol; + apr_size_t len; + + curp = baton->curp[idx]; + endp = baton->endp[idx]; + + /* Lazily update the current line even if we're at EOF. + */ + baton->current_line[idx]++; + + if (curp == endp) + return SVN_NO_ERROR; + + eol = svn_eol__find_eol_start(curp, endp - curp); + if (!eol) + eol = endp; + else + { + svn_boolean_t had_cr = (*eol == '\r'); + eol++; + if (had_cr && eol != endp && *eol == '\n') + eol++; + } + + if (type != svn_diff3__file_output_skip) + { + len = eol - curp; + /* Note that the trailing context printer assumes that + svn_stream_write is called exactly once per line. */ + SVN_ERR(svn_stream_write(baton->output_stream, curp, &len)); + } + + baton->curp[idx] = eol; + + return SVN_NO_ERROR; +} + +static svn_error_t * +output_marker_eol(svn_diff3__file_output_baton_t *btn) +{ + return svn_stream_puts(btn->output_stream, btn->marker_eol); +} + +static svn_error_t * +output_hunk(void *baton, int idx, apr_off_t target_line, + apr_off_t target_length) +{ + svn_diff3__file_output_baton_t *output_baton = baton; + + /* Skip lines until we are at the start of the changed range */ + while (output_baton->current_line[idx] < target_line) + { + SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx)); + } + + target_line += target_length; + + while (output_baton->current_line[idx] < target_line) + { + SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx)); + } + + return SVN_NO_ERROR; +} + +static svn_error_t * +output_common(void *baton, apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + return output_hunk(baton, 1, modified_start, modified_length); +} + +static svn_error_t * +output_diff_modified(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + return output_hunk(baton, 1, modified_start, modified_length); +} + +static svn_error_t * +output_diff_latest(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + return output_hunk(baton, 2, latest_start, latest_length); +} + +static svn_error_t * +output_conflict(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length, + svn_diff_t *diff); + +static const svn_diff_output_fns_t svn_diff3__file_output_vtable = +{ + output_common, + output_diff_modified, + output_diff_latest, + output_diff_modified, /* output_diff_common */ + output_conflict +}; + + + +static svn_error_t * +output_conflict_with_context(svn_diff3__file_output_baton_t *btn, + apr_off_t original_start, + apr_off_t original_length, + apr_off_t modified_start, + apr_off_t modified_length, + apr_off_t latest_start, + apr_off_t latest_length) +{ + /* Are we currently saving starting context (as opposed to printing + trailing context)? If so, flush it. */ + if (btn->output_stream == btn->context_saver->stream) + { + if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE) + SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); + SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); + } + + /* Print to the real output stream. */ + btn->output_stream = btn->real_output_stream; + + /* Output the conflict itself. */ + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (modified_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->conflict_modified, + modified_start + 1, modified_length)); + SVN_ERR(output_marker_eol(btn)); + SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length)); + + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (original_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->conflict_original, + original_start + 1, original_length)); + SVN_ERR(output_marker_eol(btn)); + SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length)); + + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + "%s%s", btn->conflict_separator, btn->marker_eol)); + SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length)); + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (latest_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->conflict_latest, + latest_start + 1, latest_length)); + SVN_ERR(output_marker_eol(btn)); + + /* Go into print-trailing-context mode instead. */ + make_trailing_context_printer(btn); + + return SVN_NO_ERROR; +} + + +static svn_error_t * +output_conflict(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length, + svn_diff_t *diff) +{ + svn_diff3__file_output_baton_t *file_baton = baton; + + svn_diff_conflict_display_style_t style = file_baton->conflict_style; + + if (style == svn_diff_conflict_display_only_conflicts) + return output_conflict_with_context(file_baton, + original_start, original_length, + modified_start, modified_length, + latest_start, latest_length); + + if (style == svn_diff_conflict_display_resolved_modified_latest) + { + if (diff) + return svn_diff_output(diff, baton, + &svn_diff3__file_output_vtable); + else + style = svn_diff_conflict_display_modified_latest; + } + + if (style == svn_diff_conflict_display_modified_latest || + style == svn_diff_conflict_display_modified_original_latest) + { + SVN_ERR(svn_stream_puts(file_baton->output_stream, + file_baton->conflict_modified)); + SVN_ERR(output_marker_eol(file_baton)); + + SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); + + if (style == svn_diff_conflict_display_modified_original_latest) + { + SVN_ERR(svn_stream_puts(file_baton->output_stream, + file_baton->conflict_original)); + SVN_ERR(output_marker_eol(file_baton)); + SVN_ERR(output_hunk(baton, 0, original_start, original_length)); + } + + SVN_ERR(svn_stream_puts(file_baton->output_stream, + file_baton->conflict_separator)); + SVN_ERR(output_marker_eol(file_baton)); + + SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); + + SVN_ERR(svn_stream_puts(file_baton->output_stream, + file_baton->conflict_latest)); + SVN_ERR(output_marker_eol(file_baton)); + } + else if (style == svn_diff_conflict_display_modified) + SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); + else if (style == svn_diff_conflict_display_latest) + SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); + else /* unknown style */ + SVN_ERR_MALFUNCTION(); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_file_output_merge2(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_path, + const char *modified_path, + const char *latest_path, + const char *conflict_original, + const char *conflict_modified, + const char *conflict_latest, + const char *conflict_separator, + svn_diff_conflict_display_style_t style, + apr_pool_t *pool) +{ + svn_diff3__file_output_baton_t baton; + apr_file_t *file[3]; + int idx; +#if APR_HAS_MMAP + apr_mmap_t *mm[3] = { 0 }; +#endif /* APR_HAS_MMAP */ + const char *eol; + svn_boolean_t conflicts_only = + (style == svn_diff_conflict_display_only_conflicts); + + memset(&baton, 0, sizeof(baton)); + if (conflicts_only) + { + baton.pool = svn_pool_create(pool); + make_context_saver(&baton); + baton.real_output_stream = output_stream; + } + else + baton.output_stream = output_stream; + baton.path[0] = original_path; + baton.path[1] = modified_path; + baton.path[2] = latest_path; + SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified, + conflict_modified ? conflict_modified + : apr_psprintf(pool, "<<<<<<< %s", + modified_path), + pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original, + conflict_original ? conflict_original + : apr_psprintf(pool, "||||||| %s", + original_path), + pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator, + conflict_separator ? conflict_separator + : "=======", pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest, + conflict_latest ? conflict_latest + : apr_psprintf(pool, ">>>>>>> %s", + latest_path), + pool)); + + baton.conflict_style = style; + + for (idx = 0; idx < 3; idx++) + { + apr_off_t size; + + SVN_ERR(map_or_read_file(&file[idx], + MMAP_T_ARG(mm[idx]) + &baton.buffer[idx], &size, + baton.path[idx], pool)); + + baton.curp[idx] = baton.buffer[idx]; + baton.endp[idx] = baton.buffer[idx]; + + if (baton.endp[idx]) + baton.endp[idx] += size; + } + + /* Check what eol marker we should use for conflict markers. + We use the eol marker of the modified file and fall back on the + platform's eol marker if that file doesn't contain any newlines. */ + eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1], + NULL); + if (! eol) + eol = APR_EOL_STR; + baton.marker_eol = eol; + + SVN_ERR(svn_diff_output(diff, &baton, + &svn_diff3__file_output_vtable)); + + for (idx = 0; idx < 3; idx++) + { +#if APR_HAS_MMAP + if (mm[idx]) + { + apr_status_t rv = apr_mmap_delete(mm[idx]); + if (rv != APR_SUCCESS) + { + return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"), + baton.path[idx]); + } + } +#endif /* APR_HAS_MMAP */ + + if (file[idx]) + { + SVN_ERR(svn_io_file_close(file[idx], pool)); + } + } + + if (conflicts_only) + svn_pool_destroy(baton.pool); + + return SVN_NO_ERROR; +} + diff --git a/subversion/libsvn_diff/diff_memory.c b/subversion/libsvn_diff/diff_memory.c new file mode 100644 index 0000000..00f4c7f --- /dev/null +++ b/subversion/libsvn_diff/diff_memory.c @@ -0,0 +1,1161 @@ +/* + * diff_memory.c : routines for doing diffs on in-memory data + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#define WANT_MEMFUNC +#define WANT_STRFUNC +#include <apr.h> +#include <apr_want.h> +#include <apr_tables.h> + +#include <assert.h> + +#include "svn_diff.h" +#include "svn_pools.h" +#include "svn_types.h" +#include "svn_string.h" +#include "svn_utf.h" +#include "diff.h" +#include "svn_private_config.h" +#include "private/svn_adler32.h" +#include "private/svn_diff_private.h" + +typedef struct source_tokens_t +{ + /* A token simply is an svn_string_t pointing to + the data of the in-memory data source, containing + the raw token text, with length stored in the string */ + apr_array_header_t *tokens; + + /* Next token to be consumed */ + apr_size_t next_token; + + /* The source, containing the in-memory data to be diffed */ + const svn_string_t *source; + + /* The last token ends with a newline character (sequence) */ + svn_boolean_t ends_without_eol; +} source_tokens_t; + +typedef struct diff_mem_baton_t +{ + /* The tokens for each of the sources */ + source_tokens_t sources[4]; + + /* Normalization buffer; we only ever compare 2 tokens at the same time */ + char *normalization_buf[2]; + + /* Options for normalized comparison of the data sources */ + const svn_diff_file_options_t *normalization_options; +} diff_mem_baton_t; + + +static int +datasource_to_index(svn_diff_datasource_e datasource) +{ + switch (datasource) + { + case svn_diff_datasource_original: + return 0; + + case svn_diff_datasource_modified: + return 1; + + case svn_diff_datasource_latest: + return 2; + + case svn_diff_datasource_ancestor: + return 3; + } + + return -1; +} + + +/* Implements svn_diff_fns2_t::datasources_open */ +static svn_error_t * +datasources_open(void *baton, + apr_off_t *prefix_lines, + apr_off_t *suffix_lines, + const svn_diff_datasource_e *datasources, + apr_size_t datasources_len) +{ + /* Do nothing: everything is already there and initialized to 0 */ + *prefix_lines = 0; + *suffix_lines = 0; + return SVN_NO_ERROR; +} + + +/* Implements svn_diff_fns2_t::datasource_close */ +static svn_error_t * +datasource_close(void *baton, svn_diff_datasource_e datasource) +{ + /* Do nothing. The compare_token function needs previous datasources + * to stay available until all datasources are processed. + */ + + return SVN_NO_ERROR; +} + + +/* Implements svn_diff_fns2_t::datasource_get_next_token */ +static svn_error_t * +datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, + svn_diff_datasource_e datasource) +{ + diff_mem_baton_t *mem_baton = baton; + source_tokens_t *src = &(mem_baton->sources[datasource_to_index(datasource)]); + + if ((apr_size_t)src->tokens->nelts > src->next_token) + { + /* There are actually tokens to be returned */ + char *buf = mem_baton->normalization_buf[0]; + svn_string_t *tok = (*token) + = APR_ARRAY_IDX(src->tokens, src->next_token, svn_string_t *); + apr_off_t len = tok->len; + svn_diff__normalize_state_t state + = svn_diff__normalize_state_normal; + + svn_diff__normalize_buffer(&buf, &len, &state, tok->data, + mem_baton->normalization_options); + *hash = svn__adler32(0, buf, len); + src->next_token++; + } + else + *token = NULL; + + return SVN_NO_ERROR; +} + +/* Implements svn_diff_fns2_t::token_compare */ +static svn_error_t * +token_compare(void *baton, void *token1, void *token2, int *result) +{ + /* Implement the same behaviour as diff_file.c:token_compare(), + but be simpler, because we know we'll have all data in memory */ + diff_mem_baton_t *btn = baton; + svn_string_t *t1 = token1; + svn_string_t *t2 = token2; + char *buf1 = btn->normalization_buf[0]; + char *buf2 = btn->normalization_buf[1]; + apr_off_t len1 = t1->len; + apr_off_t len2 = t2->len; + svn_diff__normalize_state_t state = svn_diff__normalize_state_normal; + + svn_diff__normalize_buffer(&buf1, &len1, &state, t1->data, + btn->normalization_options); + state = svn_diff__normalize_state_normal; + svn_diff__normalize_buffer(&buf2, &len2, &state, t2->data, + btn->normalization_options); + + if (len1 != len2) + *result = (len1 < len2) ? -1 : 1; + else + *result = (len1 == 0) ? 0 : memcmp(buf1, buf2, (size_t) len1); + + return SVN_NO_ERROR; +} + +/* Implements svn_diff_fns2_t::token_discard */ +static void +token_discard(void *baton, void *token) +{ + /* No-op, we have no use for discarded tokens... */ +} + + +/* Implements svn_diff_fns2_t::token_discard_all */ +static void +token_discard_all(void *baton) +{ + /* Do nothing. + Note that in the file case, this function discards all + tokens allocated, but we're geared toward small in-memory diffs. + Meaning that there's no special pool to clear. + */ +} + + +static const svn_diff_fns2_t svn_diff__mem_vtable = +{ + datasources_open, + datasource_close, + datasource_get_next_token, + token_compare, + token_discard, + token_discard_all +}; + +/* Fill SRC with the diff tokens (e.g. lines). + + TEXT is assumed to live long enough for the tokens to + stay valid during their lifetime: no data is copied, + instead, svn_string_t's are allocated pointing straight + into TEXT. +*/ +static void +fill_source_tokens(source_tokens_t *src, + const svn_string_t *text, + apr_pool_t *pool) +{ + const char *curp; + const char *endp; + const char *startp; + + src->tokens = apr_array_make(pool, 0, sizeof(svn_string_t *)); + src->next_token = 0; + src->source = text; + + for (startp = curp = text->data, endp = curp + text->len; + curp != endp; curp++) + { + if (curp != endp && *curp == '\r' && *(curp + 1) == '\n') + curp++; + + if (*curp == '\r' || *curp == '\n') + { + APR_ARRAY_PUSH(src->tokens, svn_string_t *) = + svn_string_ncreate(startp, curp - startp + 1, pool); + + startp = curp + 1; + } + } + + /* If there's anything remaining (ie last line doesn't have a newline) */ + if (startp != endp) + { + APR_ARRAY_PUSH(src->tokens, svn_string_t *) = + svn_string_ncreate(startp, endp - startp, pool); + src->ends_without_eol = TRUE; + } + else + src->ends_without_eol = FALSE; +} + + +static void +alloc_normalization_bufs(diff_mem_baton_t *btn, + int sources, + apr_pool_t *pool) +{ + apr_size_t max_len = 0; + apr_off_t idx; + int i; + + for (i = 0; i < sources; i++) + { + apr_array_header_t *tokens = btn->sources[i].tokens; + if (tokens->nelts > 0) + for (idx = 0; idx < tokens->nelts; idx++) + { + apr_size_t token_len + = APR_ARRAY_IDX(tokens, idx, svn_string_t *)->len; + max_len = (max_len < token_len) ? token_len : max_len; + } + } + + btn->normalization_buf[0] = apr_palloc(pool, max_len); + btn->normalization_buf[1] = apr_palloc(pool, max_len); +} + +svn_error_t * +svn_diff_mem_string_diff(svn_diff_t **diff, + const svn_string_t *original, + const svn_string_t *modified, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + diff_mem_baton_t baton; + + fill_source_tokens(&(baton.sources[0]), original, pool); + fill_source_tokens(&(baton.sources[1]), modified, pool); + alloc_normalization_bufs(&baton, 2, pool); + + baton.normalization_options = options; + + return svn_diff_diff_2(diff, &baton, &svn_diff__mem_vtable, pool); +} + +svn_error_t * +svn_diff_mem_string_diff3(svn_diff_t **diff, + const svn_string_t *original, + const svn_string_t *modified, + const svn_string_t *latest, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + diff_mem_baton_t baton; + + fill_source_tokens(&(baton.sources[0]), original, pool); + fill_source_tokens(&(baton.sources[1]), modified, pool); + fill_source_tokens(&(baton.sources[2]), latest, pool); + alloc_normalization_bufs(&baton, 3, pool); + + baton.normalization_options = options; + + return svn_diff_diff3_2(diff, &baton, &svn_diff__mem_vtable, pool); +} + + +svn_error_t * +svn_diff_mem_string_diff4(svn_diff_t **diff, + const svn_string_t *original, + const svn_string_t *modified, + const svn_string_t *latest, + const svn_string_t *ancestor, + const svn_diff_file_options_t *options, + apr_pool_t *pool) +{ + diff_mem_baton_t baton; + + fill_source_tokens(&(baton.sources[0]), original, pool); + fill_source_tokens(&(baton.sources[1]), modified, pool); + fill_source_tokens(&(baton.sources[2]), latest, pool); + fill_source_tokens(&(baton.sources[3]), ancestor, pool); + alloc_normalization_bufs(&baton, 4, pool); + + baton.normalization_options = options; + + return svn_diff_diff4_2(diff, &baton, &svn_diff__mem_vtable, pool); +} + + +typedef enum unified_output_e +{ + unified_output_context = 0, + unified_output_delete, + unified_output_insert, + unified_output_skip +} unified_output_e; + +/* Baton for generating unified diffs */ +typedef struct unified_output_baton_t +{ + svn_stream_t *output_stream; + const char *header_encoding; + source_tokens_t sources[2]; /* 0 == original; 1 == modified */ + apr_off_t current_token[2]; /* current token per source */ + + /* Cached markers, in header_encoding, + indexed using unified_output_e */ + const char *prefix_str[3]; + + svn_stringbuf_t *hunk; /* in-progress hunk data */ + apr_off_t hunk_length[2]; /* 0 == original; 1 == modified */ + apr_off_t hunk_start[2]; /* 0 == original; 1 == modified */ + + /* The delimiters of the hunk header, '@@' for text hunks and '##' for + * property hunks. */ + const char *hunk_delimiter; + /* The string to print after a line that does not end with a newline. + * It must start with a '\'. Typically "\ No newline at end of file". */ + const char *no_newline_string; + + /* Pool for allocation of temporary memory in the callbacks + Should be cleared on entry of each iteration of a callback */ + apr_pool_t *pool; +} output_baton_t; + + +/* Append tokens (lines) FIRST up to PAST_LAST + from token-source index TOKENS with change-type TYPE + to the current hunk. +*/ +static svn_error_t * +output_unified_token_range(output_baton_t *btn, + int tokens, + unified_output_e type, + apr_off_t until) +{ + source_tokens_t *source = &btn->sources[tokens]; + + if (until > source->tokens->nelts) + until = source->tokens->nelts; + + if (until <= btn->current_token[tokens]) + return SVN_NO_ERROR; + + /* Do the loop with prefix and token */ + while (TRUE) + { + svn_string_t *token = + APR_ARRAY_IDX(source->tokens, btn->current_token[tokens], + svn_string_t *); + + if (type != unified_output_skip) + { + svn_stringbuf_appendcstr(btn->hunk, btn->prefix_str[type]); + svn_stringbuf_appendbytes(btn->hunk, token->data, token->len); + } + + if (type == unified_output_context) + { + btn->hunk_length[0]++; + btn->hunk_length[1]++; + } + else if (type == unified_output_delete) + btn->hunk_length[0]++; + else if (type == unified_output_insert) + btn->hunk_length[1]++; + + /* ### TODO: Add skip processing for -p handling? */ + + btn->current_token[tokens]++; + if (btn->current_token[tokens] == until) + break; + } + + if (btn->current_token[tokens] == source->tokens->nelts + && source->ends_without_eol) + { + const char *out_str; + + SVN_ERR(svn_utf_cstring_from_utf8_ex2( + &out_str, btn->no_newline_string, + btn->header_encoding, btn->pool)); + svn_stringbuf_appendcstr(btn->hunk, out_str); + } + + + + return SVN_NO_ERROR; +} + +/* Flush the hunk currently built up in BATON + into the BATON's output_stream. + Use the specified HUNK_DELIMITER. + If HUNK_DELIMITER is NULL, fall back to the default delimiter. */ +static svn_error_t * +output_unified_flush_hunk(output_baton_t *baton, + const char *hunk_delimiter) +{ + apr_off_t target_token; + apr_size_t hunk_len; + apr_off_t old_start; + apr_off_t new_start; + + if (svn_stringbuf_isempty(baton->hunk)) + return SVN_NO_ERROR; + + svn_pool_clear(baton->pool); + + /* Write the trailing context */ + target_token = baton->hunk_start[0] + baton->hunk_length[0] + + SVN_DIFF__UNIFIED_CONTEXT_SIZE; + SVN_ERR(output_unified_token_range(baton, 0 /*original*/, + unified_output_context, + target_token)); + if (hunk_delimiter == NULL) + hunk_delimiter = "@@"; + + old_start = baton->hunk_start[0]; + new_start = baton->hunk_start[1]; + + /* If the file is non-empty, convert the line indexes from + zero based to one based */ + if (baton->hunk_length[0]) + old_start++; + if (baton->hunk_length[1]) + new_start++; + + /* Write the hunk header */ + SVN_ERR(svn_diff__unified_write_hunk_header( + baton->output_stream, baton->header_encoding, hunk_delimiter, + old_start, baton->hunk_length[0], + new_start, baton->hunk_length[1], + NULL /* hunk_extra_context */, + baton->pool)); + + hunk_len = baton->hunk->len; + SVN_ERR(svn_stream_write(baton->output_stream, + baton->hunk->data, &hunk_len)); + + /* Prepare for the next hunk */ + baton->hunk_length[0] = 0; + baton->hunk_length[1] = 0; + baton->hunk_start[0] = 0; + baton->hunk_start[1] = 0; + svn_stringbuf_setempty(baton->hunk); + + return SVN_NO_ERROR; +} + +/* Implements svn_diff_output_fns_t::output_diff_modified */ +static svn_error_t * +output_unified_diff_modified(void *baton, + apr_off_t original_start, + apr_off_t original_length, + apr_off_t modified_start, + apr_off_t modified_length, + apr_off_t latest_start, + apr_off_t latest_length) +{ + output_baton_t *output_baton = baton; + apr_off_t context_prefix_length; + apr_off_t prev_context_end; + svn_boolean_t init_hunk = FALSE; + + if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE) + context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE; + else + context_prefix_length = original_start; + + /* Calculate where the previous hunk will end if we would write it now + (including the necessary context at the end) */ + if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) + { + prev_context_end = output_baton->hunk_start[0] + + output_baton->hunk_length[0] + + SVN_DIFF__UNIFIED_CONTEXT_SIZE; + } + else + { + prev_context_end = -1; + + if (output_baton->hunk_start[0] == 0 + && (original_length > 0 || modified_length > 0)) + init_hunk = TRUE; + } + + /* If the changed range is far enough from the previous range, flush the current + hunk. */ + { + apr_off_t new_hunk_start = (original_start - context_prefix_length); + + if (output_baton->current_token[0] < new_hunk_start + && prev_context_end <= new_hunk_start) + { + SVN_ERR(output_unified_flush_hunk(output_baton, + output_baton->hunk_delimiter)); + init_hunk = TRUE; + } + else if (output_baton->hunk_length[0] > 0 + || output_baton->hunk_length[1] > 0) + { + /* We extend the current hunk */ + + /* Original: Output the context preceding the changed range */ + SVN_ERR(output_unified_token_range(output_baton, 0 /* original */, + unified_output_context, + original_start)); + } + } + + /* Original: Skip lines until we are at the beginning of the context we want + to display */ + SVN_ERR(output_unified_token_range(output_baton, 0 /* original */, + unified_output_skip, + original_start - context_prefix_length)); + + if (init_hunk) + { + SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 + && output_baton->hunk_length[1] == 0); + + output_baton->hunk_start[0] = original_start - context_prefix_length; + output_baton->hunk_start[1] = modified_start - context_prefix_length; + } + + /* Modified: Skip lines until we are at the start of the changed range */ + SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */, + unified_output_skip, + modified_start)); + + /* Original: Output the context preceding the changed range */ + SVN_ERR(output_unified_token_range(output_baton, 0 /* original */, + unified_output_context, + original_start)); + + /* Both: Output the changed range */ + SVN_ERR(output_unified_token_range(output_baton, 0 /* original */, + unified_output_delete, + original_start + original_length)); + SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */, + unified_output_insert, + modified_start + modified_length)); + + return SVN_NO_ERROR; +} + +static const svn_diff_output_fns_t mem_output_unified_vtable = +{ + NULL, /* output_common */ + output_unified_diff_modified, + NULL, /* output_diff_latest */ + NULL, /* output_diff_common */ + NULL /* output_conflict */ +}; + + +svn_error_t * +svn_diff_mem_string_output_unified2(svn_stream_t *output_stream, + svn_diff_t *diff, + svn_boolean_t with_diff_header, + const char *hunk_delimiter, + const char *original_header, + const char *modified_header, + const char *header_encoding, + const svn_string_t *original, + const svn_string_t *modified, + apr_pool_t *pool) +{ + + if (svn_diff_contains_diffs(diff)) + { + output_baton_t baton; + + memset(&baton, 0, sizeof(baton)); + baton.output_stream = output_stream; + baton.pool = svn_pool_create(pool); + baton.header_encoding = header_encoding; + baton.hunk = svn_stringbuf_create_empty(pool); + baton.hunk_delimiter = hunk_delimiter; + baton.no_newline_string + = (hunk_delimiter == NULL || strcmp(hunk_delimiter, "##") != 0) + ? APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR + : APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_PROPERTY APR_EOL_STR; + + SVN_ERR(svn_utf_cstring_from_utf8_ex2 + (&(baton.prefix_str[unified_output_context]), " ", + header_encoding, pool)); + SVN_ERR(svn_utf_cstring_from_utf8_ex2 + (&(baton.prefix_str[unified_output_delete]), "-", + header_encoding, pool)); + SVN_ERR(svn_utf_cstring_from_utf8_ex2 + (&(baton.prefix_str[unified_output_insert]), "+", + header_encoding, pool)); + + fill_source_tokens(&baton.sources[0], original, pool); + fill_source_tokens(&baton.sources[1], modified, pool); + + if (with_diff_header) + { + SVN_ERR(svn_diff__unidiff_write_header( + output_stream, header_encoding, + original_header, modified_header, pool)); + } + + SVN_ERR(svn_diff_output(diff, &baton, + &mem_output_unified_vtable)); + + SVN_ERR(output_unified_flush_hunk(&baton, hunk_delimiter)); + + svn_pool_destroy(baton.pool); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_mem_string_output_unified(svn_stream_t *output_stream, + svn_diff_t *diff, + const char *original_header, + const char *modified_header, + const char *header_encoding, + const svn_string_t *original, + const svn_string_t *modified, + apr_pool_t *pool) +{ + SVN_ERR(svn_diff_mem_string_output_unified2(output_stream, + diff, + TRUE, + NULL, + original_header, + modified_header, + header_encoding, + original, + modified, + pool)); + return SVN_NO_ERROR; +} + + + +/* diff3 merge output */ + +/* A stream to remember *leading* context. Note that this stream does + *not* copy the data that it is remembering; it just saves + *pointers! */ +typedef struct context_saver_t { + svn_stream_t *stream; + const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; + apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; + apr_size_t next_slot; + apr_size_t total_written; +} context_saver_t; + + +static svn_error_t * +context_saver_stream_write(void *baton, + const char *data, + apr_size_t *len) +{ + context_saver_t *cs = baton; + cs->data[cs->next_slot] = data; + cs->len[cs->next_slot] = *len; + cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; + cs->total_written++; + return SVN_NO_ERROR; +} + + +typedef struct merge_output_baton_t +{ + svn_stream_t *output_stream; + + /* Tokenized source text */ + source_tokens_t sources[3]; + apr_off_t next_token[3]; + + /* Markers for marking conflicted sections */ + const char *markers[4]; /* 0 = original, 1 = modified, + 2 = separator, 3 = latest (end) */ + const char *marker_eol; + + svn_diff_conflict_display_style_t conflict_style; + + /* The rest of the fields are for + svn_diff_conflict_display_only_conflicts only. Note that for + these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or + (soon after a conflict) a "trailing context stream", never the + actual output stream.*/ + /* The actual output stream. */ + svn_stream_t *real_output_stream; + context_saver_t *context_saver; + /* Used to allocate context_saver and trailing context streams, and + for some printfs. */ + apr_pool_t *pool; +} merge_output_baton_t; + + +static svn_error_t * +flush_context_saver(context_saver_t *cs, + svn_stream_t *output_stream) +{ + int i; + for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++) + { + apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; + if (cs->data[slot]) + { + apr_size_t len = cs->len[slot]; + SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); + } + } + return SVN_NO_ERROR; +} + + +static void +make_context_saver(merge_output_baton_t *mob) +{ + context_saver_t *cs; + + svn_pool_clear(mob->pool); + cs = apr_pcalloc(mob->pool, sizeof(*cs)); + cs->stream = svn_stream_empty(mob->pool); + svn_stream_set_baton(cs->stream, cs); + svn_stream_set_write(cs->stream, context_saver_stream_write); + mob->context_saver = cs; + mob->output_stream = cs->stream; +} + + +/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to + BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to + a context_saver; used for *trailing* context. */ + +struct trailing_context_printer { + apr_size_t lines_to_print; + merge_output_baton_t *mob; +}; + + +static svn_error_t * +trailing_context_printer_write(void *baton, + const char *data, + apr_size_t *len) +{ + struct trailing_context_printer *tcp = baton; + SVN_ERR_ASSERT(tcp->lines_to_print > 0); + SVN_ERR(svn_stream_write(tcp->mob->real_output_stream, data, len)); + tcp->lines_to_print--; + if (tcp->lines_to_print == 0) + make_context_saver(tcp->mob); + return SVN_NO_ERROR; +} + + +static void +make_trailing_context_printer(merge_output_baton_t *btn) +{ + struct trailing_context_printer *tcp; + svn_stream_t *s; + + svn_pool_clear(btn->pool); + + tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); + tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE; + tcp->mob = btn; + s = svn_stream_empty(btn->pool); + svn_stream_set_baton(s, tcp); + svn_stream_set_write(s, trailing_context_printer_write); + btn->output_stream = s; +} + + +static svn_error_t * +output_merge_token_range(apr_size_t *lines_printed_p, + merge_output_baton_t *btn, + int idx, apr_off_t first, + apr_off_t length) +{ + apr_array_header_t *tokens = btn->sources[idx].tokens; + apr_size_t lines_printed = 0; + + for (; length > 0 && first < tokens->nelts; length--, first++) + { + svn_string_t *token = APR_ARRAY_IDX(tokens, first, svn_string_t *); + apr_size_t len = token->len; + + /* Note that the trailing context printer assumes that + svn_stream_write is called exactly once per line. */ + SVN_ERR(svn_stream_write(btn->output_stream, token->data, &len)); + lines_printed++; + } + + if (lines_printed_p) + *lines_printed_p = lines_printed; + + return SVN_NO_ERROR; +} + +static svn_error_t * +output_marker_eol(merge_output_baton_t *btn) +{ + return svn_stream_puts(btn->output_stream, btn->marker_eol); +} + +static svn_error_t * +output_merge_marker(merge_output_baton_t *btn, int idx) +{ + SVN_ERR(svn_stream_puts(btn->output_stream, btn->markers[idx])); + return output_marker_eol(btn); +} + +static svn_error_t * +output_common_modified(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + return output_merge_token_range(NULL, baton, 1/*modified*/, + modified_start, modified_length); +} + +static svn_error_t * +output_latest(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length) +{ + return output_merge_token_range(NULL, baton, 2/*latest*/, + latest_start, latest_length); +} + +static svn_error_t * +output_conflict(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length, + svn_diff_t *diff); + +static const svn_diff_output_fns_t merge_output_vtable = +{ + output_common_modified, /* common */ + output_common_modified, /* modified */ + output_latest, + output_common_modified, /* output_diff_common */ + output_conflict +}; + +static svn_error_t * +output_conflict(void *baton, + apr_off_t original_start, apr_off_t original_length, + apr_off_t modified_start, apr_off_t modified_length, + apr_off_t latest_start, apr_off_t latest_length, + svn_diff_t *diff) +{ + merge_output_baton_t *btn = baton; + + svn_diff_conflict_display_style_t style = btn->conflict_style; + + if (style == svn_diff_conflict_display_resolved_modified_latest) + { + if (diff) + return svn_diff_output(diff, baton, &merge_output_vtable); + else + style = svn_diff_conflict_display_modified_latest; + } + + if (style == svn_diff_conflict_display_modified_latest || + style == svn_diff_conflict_display_modified_original_latest) + { + SVN_ERR(output_merge_marker(btn, 1/*modified*/)); + SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/, + modified_start, modified_length)); + + if (style == svn_diff_conflict_display_modified_original_latest) + { + SVN_ERR(output_merge_marker(btn, 0/*original*/)); + SVN_ERR(output_merge_token_range(NULL, btn, 0/*original*/, + original_start, original_length)); + } + + SVN_ERR(output_merge_marker(btn, 2/*separator*/)); + SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/, + latest_start, latest_length)); + SVN_ERR(output_merge_marker(btn, 3/*latest (end)*/)); + } + else if (style == svn_diff_conflict_display_modified) + SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/, + modified_start, modified_length)); + else if (style == svn_diff_conflict_display_latest) + SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/, + latest_start, latest_length)); + else /* unknown style */ + SVN_ERR_MALFUNCTION(); + + return SVN_NO_ERROR; +} + + +static svn_error_t * +output_conflict_with_context(void *baton, + apr_off_t original_start, + apr_off_t original_length, + apr_off_t modified_start, + apr_off_t modified_length, + apr_off_t latest_start, + apr_off_t latest_length, + svn_diff_t *diff) +{ + merge_output_baton_t *btn = baton; + + /* Are we currently saving starting context (as opposed to printing + trailing context)? If so, flush it. */ + if (btn->output_stream == btn->context_saver->stream) + { + if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE) + SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); + SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); + } + + /* Print to the real output stream. */ + btn->output_stream = btn->real_output_stream; + + /* Output the conflict itself. */ + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (modified_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->markers[1], + modified_start + 1, modified_length)); + SVN_ERR(output_marker_eol(btn)); + SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/, + modified_start, modified_length)); + + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (original_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->markers[0], + original_start + 1, original_length)); + SVN_ERR(output_marker_eol(btn)); + SVN_ERR(output_merge_token_range(NULL, btn, 0/*original*/, + original_start, original_length)); + + SVN_ERR(output_merge_marker(btn, 2/*separator*/)); + SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/, + latest_start, latest_length)); + SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, + (latest_length == 1 + ? "%s (%" APR_OFF_T_FMT ")" + : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), + btn->markers[3], + latest_start + 1, latest_length)); + SVN_ERR(output_marker_eol(btn)); + + /* Go into print-trailing-context mode instead. */ + make_trailing_context_printer(btn); + + return SVN_NO_ERROR; +} + + +static const svn_diff_output_fns_t merge_only_conflicts_output_vtable = +{ + output_common_modified, + output_common_modified, + output_latest, + output_common_modified, + output_conflict_with_context +}; + + +/* TOKEN is the first token in the modified file. + Return its line-ending, if any. */ +static const char * +detect_eol(svn_string_t *token) +{ + const char *curp; + + if (token->len == 0) + return NULL; + + curp = token->data + token->len - 1; + if (*curp == '\r') + return "\r"; + else if (*curp != '\n') + return NULL; + else + { + if (token->len == 1 + || *(--curp) != '\r') + return "\n"; + else + return "\r\n"; + } +} + +svn_error_t * +svn_diff_mem_string_output_merge2(svn_stream_t *output_stream, + svn_diff_t *diff, + const svn_string_t *original, + const svn_string_t *modified, + const svn_string_t *latest, + const char *conflict_original, + const char *conflict_modified, + const char *conflict_latest, + const char *conflict_separator, + svn_diff_conflict_display_style_t style, + apr_pool_t *pool) +{ + merge_output_baton_t btn; + const char *eol; + svn_boolean_t conflicts_only = + (style == svn_diff_conflict_display_only_conflicts); + const svn_diff_output_fns_t *vtable = conflicts_only + ? &merge_only_conflicts_output_vtable : &merge_output_vtable; + + memset(&btn, 0, sizeof(btn)); + + if (conflicts_only) + { + btn.pool = svn_pool_create(pool); + make_context_saver(&btn); + btn.real_output_stream = output_stream; + } + else + btn.output_stream = output_stream; + + fill_source_tokens(&(btn.sources[0]), original, pool); + fill_source_tokens(&(btn.sources[1]), modified, pool); + fill_source_tokens(&(btn.sources[2]), latest, pool); + + btn.conflict_style = style; + + if (btn.sources[1].tokens->nelts > 0) + { + eol = detect_eol(APR_ARRAY_IDX(btn.sources[1].tokens, 0, svn_string_t *)); + if (!eol) + eol = APR_EOL_STR; /* use the platform default */ + } + else + eol = APR_EOL_STR; /* use the platform default */ + + btn.marker_eol = eol; + + SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[1], + conflict_modified + ? conflict_modified + : "<<<<<<< (modified)", + pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[0], + conflict_original + ? conflict_original + : "||||||| (original)", + pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[2], + conflict_separator + ? conflict_separator + : "=======", + pool)); + SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[3], + conflict_latest + ? conflict_latest + : ">>>>>>> (latest)", + pool)); + + SVN_ERR(svn_diff_output(diff, &btn, vtable)); + if (conflicts_only) + svn_pool_destroy(btn.pool); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_mem_string_output_merge(svn_stream_t *output_stream, + svn_diff_t *diff, + const svn_string_t *original, + const svn_string_t *modified, + const svn_string_t *latest, + const char *conflict_original, + const char *conflict_modified, + const char *conflict_latest, + const char *conflict_separator, + svn_boolean_t display_original_in_conflict, + svn_boolean_t display_resolved_conflicts, + apr_pool_t *pool) +{ + svn_diff_conflict_display_style_t style = + svn_diff_conflict_display_modified_latest; + + if (display_resolved_conflicts) + style = svn_diff_conflict_display_resolved_modified_latest; + + if (display_original_in_conflict) + style = svn_diff_conflict_display_modified_original_latest; + + return svn_diff_mem_string_output_merge2(output_stream, + diff, + original, + modified, + latest, + conflict_original, + conflict_modified, + conflict_latest, + conflict_separator, + style, + pool); +} diff --git a/subversion/libsvn_diff/diff_tree.c b/subversion/libsvn_diff/diff_tree.c new file mode 100644 index 0000000..8490179 --- /dev/null +++ b/subversion/libsvn_diff/diff_tree.c @@ -0,0 +1,1705 @@ +/* + * diff_tree.c : default diff tree processor + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include <assert.h> + +#include "svn_dirent_uri.h" +#include "svn_error.h" +#include "svn_io.h" +#include "svn_pools.h" +#include "svn_props.h" +#include "svn_types.h" + +#include "private/svn_diff_tree.h" +#include "svn_private_config.h" + +typedef struct tree_processor_t +{ + svn_diff_tree_processor_t tp; + + /* void *future_extension */ +} tree_processor_t; + + +static svn_error_t * +default_dir_opened(void **new_dir_baton, + svn_boolean_t *skip, + svn_boolean_t *skip_children, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *parent_dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + *new_dir_baton = NULL; + return SVN_NO_ERROR; +} + +static svn_error_t * +default_dir_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->dir_closed(relpath, NULL, right_source, + dir_baton, processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +default_dir_deleted(const char *relpath, + const svn_diff_source_t *left_source, + /*const*/ apr_hash_t *left_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->dir_closed(relpath, left_source, NULL, + dir_baton, processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +default_dir_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + const apr_array_header_t *prop_changes, + void *dir_baton, + const struct svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->dir_closed(relpath, + left_source, right_source, + dir_baton, + processor, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +default_dir_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + return SVN_NO_ERROR; +} + +static svn_error_t * +default_file_opened(void **new_file_baton, + svn_boolean_t *skip, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + *new_file_baton = dir_baton; + return SVN_NO_ERROR; +} + +static svn_error_t * +default_file_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + const char *copyfrom_file, + const char *right_file, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->file_closed(relpath, + NULL, right_source, + file_baton, processor, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +default_file_deleted(const char *relpath, + const svn_diff_source_t *left_source, + const char *left_file, + /*const*/ apr_hash_t *left_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->file_closed(relpath, + left_source, NULL, + file_baton, processor, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +default_file_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const char *left_file, + const char *right_file, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + svn_boolean_t file_modified, + const apr_array_header_t *prop_changes, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + SVN_ERR(processor->file_closed(relpath, + left_source, right_source, + file_baton, processor, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +default_file_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + return SVN_NO_ERROR; +} + +static svn_error_t * +default_node_absent(const char *relpath, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + return SVN_NO_ERROR; +} + +svn_diff_tree_processor_t * +svn_diff__tree_processor_create(void *baton, + apr_pool_t *result_pool) +{ + tree_processor_t *wrapper; + wrapper = apr_pcalloc(result_pool, sizeof(*wrapper)); + + wrapper->tp.baton = baton; + + wrapper->tp.dir_opened = default_dir_opened; + wrapper->tp.dir_added = default_dir_added; + wrapper->tp.dir_deleted = default_dir_deleted; + wrapper->tp.dir_changed = default_dir_changed; + wrapper->tp.dir_closed = default_dir_closed; + + wrapper->tp.file_opened = default_file_opened; + wrapper->tp.file_added = default_file_added; + wrapper->tp.file_deleted = default_file_deleted; + wrapper->tp.file_changed = default_file_changed; + wrapper->tp.file_closed = default_file_closed; + + wrapper->tp.node_absent = default_node_absent; + + + return &wrapper->tp; +} + +struct reverse_tree_baton_t +{ + const svn_diff_tree_processor_t *processor; + const char *prefix_relpath; +}; + +static svn_error_t * +reverse_dir_opened(void **new_dir_baton, + svn_boolean_t *skip, + svn_boolean_t *skip_children, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *parent_dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->dir_opened(new_dir_baton, skip, skip_children, + relpath, + right_source, left_source, + NULL /* copyfrom */, + parent_dir_baton, + rb->processor, + result_pool, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_dir_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->dir_deleted(relpath, + right_source, + right_props, + dir_baton, + rb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_dir_deleted(const char *relpath, + const svn_diff_source_t *left_source, + /*const*/ apr_hash_t *left_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->dir_added(relpath, + NULL, + left_source, + NULL, + left_props, + dir_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_dir_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + const apr_array_header_t *prop_changes, + void *dir_baton, + const struct svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + apr_array_header_t *reversed_prop_changes = NULL; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + if (prop_changes) + { + SVN_ERR_ASSERT(left_props != NULL && right_props != NULL); + SVN_ERR(svn_prop_diffs(&reversed_prop_changes, left_props, right_props, + scratch_pool)); + } + + SVN_ERR(rb->processor->dir_changed(relpath, + right_source, + left_source, + right_props, + left_props, + reversed_prop_changes, + dir_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_dir_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->dir_closed(relpath, + right_source, + left_source, + dir_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_file_opened(void **new_file_baton, + svn_boolean_t *skip, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->file_opened(new_file_baton, + skip, + relpath, + right_source, + left_source, + NULL /* copy_from */, + dir_baton, + rb->processor, + result_pool, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_file_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + const char *copyfrom_file, + const char *right_file, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->file_deleted(relpath, + right_source, + right_file, + right_props, + file_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_file_deleted(const char *relpath, + const svn_diff_source_t *left_source, + const char *left_file, + /*const*/ apr_hash_t *left_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->file_added(relpath, + NULL /* copyfrom src */, + left_source, + NULL /* copyfrom file */, + left_file, + NULL /* copyfrom props */, + left_props, + file_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_file_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const char *left_file, + const char *right_file, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + svn_boolean_t file_modified, + const apr_array_header_t *prop_changes, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + apr_array_header_t *reversed_prop_changes = NULL; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + if (prop_changes) + { + SVN_ERR_ASSERT(left_props != NULL && right_props != NULL); + SVN_ERR(svn_prop_diffs(&reversed_prop_changes, left_props, right_props, + scratch_pool)); + } + + SVN_ERR(rb->processor->file_changed(relpath, + right_source, + left_source, + right_file, + left_file, + right_props, + left_props, + file_modified, + reversed_prop_changes, + file_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_file_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->file_closed(relpath, + right_source, + left_source, + file_baton, + rb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +reverse_node_absent(const char *relpath, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct reverse_tree_baton_t *rb = processor->baton; + + if (rb->prefix_relpath) + relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool); + + SVN_ERR(rb->processor->node_absent(relpath, + dir_baton, + rb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + + +const svn_diff_tree_processor_t * +svn_diff__tree_processor_reverse_create(const svn_diff_tree_processor_t * processor, + const char *prefix_relpath, + apr_pool_t *result_pool) +{ + struct reverse_tree_baton_t *rb; + svn_diff_tree_processor_t *reverse; + + rb = apr_pcalloc(result_pool, sizeof(*rb)); + rb->processor = processor; + if (prefix_relpath) + rb->prefix_relpath = apr_pstrdup(result_pool, prefix_relpath); + + reverse = svn_diff__tree_processor_create(rb, result_pool); + + reverse->dir_opened = reverse_dir_opened; + reverse->dir_added = reverse_dir_added; + reverse->dir_deleted = reverse_dir_deleted; + reverse->dir_changed = reverse_dir_changed; + reverse->dir_closed = reverse_dir_closed; + + reverse->file_opened = reverse_file_opened; + reverse->file_added = reverse_file_added; + reverse->file_deleted = reverse_file_deleted; + reverse->file_changed = reverse_file_changed; + reverse->file_closed = reverse_file_closed; + + reverse->node_absent = reverse_node_absent; + + return reverse; +} + +struct filter_tree_baton_t +{ + const svn_diff_tree_processor_t *processor; + const char *prefix_relpath; +}; + +static svn_error_t * +filter_dir_opened(void **new_dir_baton, + svn_boolean_t *skip, + svn_boolean_t *skip_children, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *parent_dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + + if (! relpath) + { + /* Skip work for this, but NOT for DESCENDANTS */ + *skip = TRUE; + return SVN_NO_ERROR; + } + + SVN_ERR(fb->processor->dir_opened(new_dir_baton, skip, skip_children, + relpath, + left_source, right_source, + copyfrom_source, + parent_dir_baton, + fb->processor, + result_pool, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_dir_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->dir_added(relpath, + copyfrom_source, + right_source, + copyfrom_props, + right_props, + dir_baton, + fb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_dir_deleted(const char *relpath, + const svn_diff_source_t *left_source, + /*const*/ apr_hash_t *left_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->dir_deleted(relpath, + left_source, + left_props, + dir_baton, + fb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_dir_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + const apr_array_header_t *prop_changes, + void *dir_baton, + const struct svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->dir_changed(relpath, + left_source, + right_source, + left_props, + right_props, + prop_changes, + dir_baton, + fb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_dir_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->dir_closed(relpath, + left_source, + right_source, + dir_baton, + fb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_file_opened(void **new_file_baton, + svn_boolean_t *skip, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + + if (! relpath) + { + *skip = TRUE; + return SVN_NO_ERROR; + } + + SVN_ERR(fb->processor->file_opened(new_file_baton, + skip, + relpath, + left_source, + right_source, + copyfrom_source, + dir_baton, + fb->processor, + result_pool, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_file_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + const char *copyfrom_file, + const char *right_file, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->file_added(relpath, + copyfrom_source, + right_source, + copyfrom_file, + right_file, + copyfrom_props, + right_props, + file_baton, + fb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_file_deleted(const char *relpath, + const svn_diff_source_t *left_source, + const char *left_file, + /*const*/ apr_hash_t *left_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->file_deleted(relpath, + left_source, + left_file, + left_props, + file_baton, + fb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_file_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const char *left_file, + const char *right_file, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + svn_boolean_t file_modified, + const apr_array_header_t *prop_changes, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->file_changed(relpath, + left_source, + right_source, + left_file, + right_file, + left_props, + right_props, + file_modified, + prop_changes, + file_baton, + fb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_file_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->file_closed(relpath, + left_source, + right_source, + file_baton, + fb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +filter_node_absent(const char *relpath, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct filter_tree_baton_t *fb = processor->baton; + + relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath); + assert(relpath != NULL); /* Driver error */ + + SVN_ERR(fb->processor->node_absent(relpath, + dir_baton, + fb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + + +const svn_diff_tree_processor_t * +svn_diff__tree_processor_filter_create(const svn_diff_tree_processor_t * processor, + const char *prefix_relpath, + apr_pool_t *result_pool) +{ + struct filter_tree_baton_t *fb; + svn_diff_tree_processor_t *filter; + + fb = apr_pcalloc(result_pool, sizeof(*fb)); + fb->processor = processor; + if (prefix_relpath) + fb->prefix_relpath = apr_pstrdup(result_pool, prefix_relpath); + + filter = svn_diff__tree_processor_create(fb, result_pool); + + filter->dir_opened = filter_dir_opened; + filter->dir_added = filter_dir_added; + filter->dir_deleted = filter_dir_deleted; + filter->dir_changed = filter_dir_changed; + filter->dir_closed = filter_dir_closed; + + filter->file_opened = filter_file_opened; + filter->file_added = filter_file_added; + filter->file_deleted = filter_file_deleted; + filter->file_changed = filter_file_changed; + filter->file_closed = filter_file_closed; + + filter->node_absent = filter_node_absent; + + return filter; +} + +struct copy_as_changed_baton_t +{ + const svn_diff_tree_processor_t *processor; +}; + +static svn_error_t * +copy_as_changed_dir_opened(void **new_dir_baton, + svn_boolean_t *skip, + svn_boolean_t *skip_children, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *parent_dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + if (!left_source && copyfrom_source) + { + assert(right_source != NULL); + + left_source = copyfrom_source; + copyfrom_source = NULL; + } + + SVN_ERR(cb->processor->dir_opened(new_dir_baton, skip, skip_children, + relpath, + left_source, right_source, + copyfrom_source, + parent_dir_baton, + cb->processor, + result_pool, scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_dir_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + if (copyfrom_source) + { + apr_array_header_t *propchanges; + SVN_ERR(svn_prop_diffs(&propchanges, right_props, copyfrom_props, + scratch_pool)); + SVN_ERR(cb->processor->dir_changed(relpath, + copyfrom_source, + right_source, + copyfrom_props, + right_props, + propchanges, + dir_baton, + cb->processor, + scratch_pool)); + } + else + { + SVN_ERR(cb->processor->dir_added(relpath, + copyfrom_source, + right_source, + copyfrom_props, + right_props, + dir_baton, + cb->processor, + scratch_pool)); + } + + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_dir_deleted(const char *relpath, + const svn_diff_source_t *left_source, + /*const*/ apr_hash_t *left_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->dir_deleted(relpath, + left_source, + left_props, + dir_baton, + cb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_dir_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + const apr_array_header_t *prop_changes, + void *dir_baton, + const struct svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->dir_changed(relpath, + left_source, + right_source, + left_props, + right_props, + prop_changes, + dir_baton, + cb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_dir_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->dir_closed(relpath, + left_source, + right_source, + dir_baton, + cb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_file_opened(void **new_file_baton, + svn_boolean_t *skip, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + if (!left_source && copyfrom_source) + { + assert(right_source != NULL); + + left_source = copyfrom_source; + copyfrom_source = NULL; + } + + SVN_ERR(cb->processor->file_opened(new_file_baton, + skip, + relpath, + left_source, + right_source, + copyfrom_source, + dir_baton, + cb->processor, + result_pool, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_file_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + const char *copyfrom_file, + const char *right_file, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + if (copyfrom_source) + { + apr_array_header_t *propchanges; + svn_boolean_t same; + SVN_ERR(svn_prop_diffs(&propchanges, right_props, copyfrom_props, + scratch_pool)); + + /* "" is sometimes a marker for just modified (E.g. no-textdeltas), + and it is certainly not a file */ + if (*copyfrom_file && *right_file) + { + SVN_ERR(svn_io_files_contents_same_p(&same, copyfrom_file, + right_file, scratch_pool)); + } + else + same = FALSE; + + SVN_ERR(cb->processor->file_changed(relpath, + copyfrom_source, + right_source, + copyfrom_file, + right_file, + copyfrom_props, + right_props, + !same, + propchanges, + file_baton, + cb->processor, + scratch_pool)); + } + else + { + SVN_ERR(cb->processor->file_added(relpath, + copyfrom_source, + right_source, + copyfrom_file, + right_file, + copyfrom_props, + right_props, + file_baton, + cb->processor, + scratch_pool)); + } + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_file_deleted(const char *relpath, + const svn_diff_source_t *left_source, + const char *left_file, + /*const*/ apr_hash_t *left_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->file_deleted(relpath, + left_source, + left_file, + left_props, + file_baton, + cb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_file_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const char *left_file, + const char *right_file, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + svn_boolean_t file_modified, + const apr_array_header_t *prop_changes, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->file_changed(relpath, + left_source, + right_source, + left_file, + right_file, + left_props, + right_props, + file_modified, + prop_changes, + file_baton, + cb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_file_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->file_closed(relpath, + left_source, + right_source, + file_baton, + cb->processor, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +copy_as_changed_node_absent(const char *relpath, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct copy_as_changed_baton_t *cb = processor->baton; + + SVN_ERR(cb->processor->node_absent(relpath, + dir_baton, + cb->processor, + scratch_pool)); + return SVN_NO_ERROR; +} + + +const svn_diff_tree_processor_t * +svn_diff__tree_processor_copy_as_changed_create( + const svn_diff_tree_processor_t * processor, + apr_pool_t *result_pool) +{ + struct copy_as_changed_baton_t *cb; + svn_diff_tree_processor_t *filter; + + cb = apr_pcalloc(result_pool, sizeof(*cb)); + cb->processor = processor; + + filter = svn_diff__tree_processor_create(cb, result_pool); + filter->dir_opened = copy_as_changed_dir_opened; + filter->dir_added = copy_as_changed_dir_added; + filter->dir_deleted = copy_as_changed_dir_deleted; + filter->dir_changed = copy_as_changed_dir_changed; + filter->dir_closed = copy_as_changed_dir_closed; + + filter->file_opened = copy_as_changed_file_opened; + filter->file_added = copy_as_changed_file_added; + filter->file_deleted = copy_as_changed_file_deleted; + filter->file_changed = copy_as_changed_file_changed; + filter->file_closed = copy_as_changed_file_closed; + + filter->node_absent = copy_as_changed_node_absent; + + return filter; +} + + +/* Processor baton for the tee tree processor */ +struct tee_baton_t +{ + const svn_diff_tree_processor_t *p1; + const svn_diff_tree_processor_t *p2; +}; + +/* Wrapper baton for file and directory batons in the tee processor */ +struct tee_node_baton_t +{ + void *baton1; + void *baton2; +}; + +static svn_error_t * +tee_dir_opened(void **new_dir_baton, + svn_boolean_t *skip, + svn_boolean_t *skip_children, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *parent_dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *pb = parent_dir_baton; + struct tee_node_baton_t *nb = apr_pcalloc(result_pool, sizeof(*nb)); + + SVN_ERR(tb->p1->dir_opened(&(nb->baton1), + skip, + skip_children, + relpath, + left_source, + right_source, + copyfrom_source, + pb ? pb->baton1 : NULL, + tb->p1, + result_pool, + scratch_pool)); + + SVN_ERR(tb->p2->dir_opened(&(nb->baton2), + skip, + skip_children, + relpath, + left_source, + right_source, + copyfrom_source, + pb ? pb->baton2 : NULL, + tb->p2, + result_pool, + scratch_pool)); + + *new_dir_baton = nb; + + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_dir_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *db = dir_baton; + + SVN_ERR(tb->p1->dir_added(relpath, + copyfrom_source, + right_source, + copyfrom_props, + right_props, + db->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->dir_added(relpath, + copyfrom_source, + right_source, + copyfrom_props, + right_props, + db->baton2, + tb->p2, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_dir_deleted(const char *relpath, + const svn_diff_source_t *left_source, + /*const*/ apr_hash_t *left_props, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *db = dir_baton; + + SVN_ERR(tb->p1->dir_deleted(relpath, + left_source, + left_props, + db->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->dir_deleted(relpath, + left_source, + left_props, + db->baton2, + tb->p2, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_dir_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + const apr_array_header_t *prop_changes, + void *dir_baton, + const struct svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *db = dir_baton; + + SVN_ERR(tb->p1->dir_changed(relpath, + left_source, + right_source, + left_props, + right_props, + prop_changes, + db->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->dir_changed(relpath, + left_source, + right_source, + left_props, + right_props, + prop_changes, + db->baton2, + tb->p2, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_dir_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *db = dir_baton; + + SVN_ERR(tb->p1->dir_closed(relpath, + left_source, + right_source, + db->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->dir_closed(relpath, + left_source, + right_source, + db->baton2, + tb->p2, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_file_opened(void **new_file_baton, + svn_boolean_t *skip, + const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const svn_diff_source_t *copyfrom_source, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *pb = dir_baton; + struct tee_node_baton_t *nb = apr_pcalloc(result_pool, sizeof(*nb)); + + SVN_ERR(tb->p1->file_opened(&(nb->baton1), + skip, + relpath, + left_source, + right_source, + copyfrom_source, + pb ? pb->baton1 : NULL, + tb->p1, + result_pool, + scratch_pool)); + + SVN_ERR(tb->p2->file_opened(&(nb->baton2), + skip, + relpath, + left_source, + right_source, + copyfrom_source, + pb ? pb->baton2 : NULL, + tb->p2, + result_pool, + scratch_pool)); + + *new_file_baton = nb; + + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_file_added(const char *relpath, + const svn_diff_source_t *copyfrom_source, + const svn_diff_source_t *right_source, + const char *copyfrom_file, + const char *right_file, + /*const*/ apr_hash_t *copyfrom_props, + /*const*/ apr_hash_t *right_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *fb = file_baton; + + SVN_ERR(tb->p1->file_added(relpath, + copyfrom_source, + right_source, + copyfrom_file, + right_file, + copyfrom_props, + right_props, + fb->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->file_added(relpath, + copyfrom_source, + right_source, + copyfrom_file, + right_file, + copyfrom_props, + right_props, + fb->baton2, + tb->p2, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_file_deleted(const char *relpath, + const svn_diff_source_t *left_source, + const char *left_file, + /*const*/ apr_hash_t *left_props, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *fb = file_baton; + + SVN_ERR(tb->p1->file_deleted(relpath, + left_source, + left_file, + left_props, + fb->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->file_deleted(relpath, + left_source, + left_file, + left_props, + fb->baton2, + tb->p2, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_file_changed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + const char *left_file, + const char *right_file, + /*const*/ apr_hash_t *left_props, + /*const*/ apr_hash_t *right_props, + svn_boolean_t file_modified, + const apr_array_header_t *prop_changes, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *fb = file_baton; + + SVN_ERR(tb->p1->file_changed(relpath, + left_source, + right_source, + left_file, + right_file, + left_props, + right_props, + file_modified, + prop_changes, + fb->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->file_changed(relpath, + left_source, + right_source, + left_file, + right_file, + left_props, + right_props, + file_modified, + prop_changes, + fb->baton2, + tb->p2, + scratch_pool)); + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_file_closed(const char *relpath, + const svn_diff_source_t *left_source, + const svn_diff_source_t *right_source, + void *file_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *fb = file_baton; + + SVN_ERR(tb->p1->file_closed(relpath, + left_source, + right_source, + fb->baton1, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->file_closed(relpath, + left_source, + right_source, + fb->baton2, + tb->p2, + scratch_pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +tee_node_absent(const char *relpath, + void *dir_baton, + const svn_diff_tree_processor_t *processor, + apr_pool_t *scratch_pool) +{ + struct tee_baton_t *tb = processor->baton; + struct tee_node_baton_t *db = dir_baton; + + SVN_ERR(tb->p1->node_absent(relpath, + db ? db->baton1 : NULL, + tb->p1, + scratch_pool)); + + SVN_ERR(tb->p2->node_absent(relpath, + db ? db->baton2 : NULL, + tb->p2, + scratch_pool)); + + return SVN_NO_ERROR; +} + +const svn_diff_tree_processor_t * +svn_diff__tree_processor_tee_create(const svn_diff_tree_processor_t *processor1, + const svn_diff_tree_processor_t *processor2, + apr_pool_t *result_pool) +{ + struct tee_baton_t *tb = apr_pcalloc(result_pool, sizeof(*tb)); + svn_diff_tree_processor_t *tee; + tb->p1 = processor1; + tb->p2 = processor2; + + tee = svn_diff__tree_processor_create(tb, result_pool); + + tee->dir_opened = tee_dir_opened; + tee->dir_added = tee_dir_added; + tee->dir_deleted = tee_dir_deleted; + tee->dir_changed = tee_dir_changed; + tee->dir_closed = tee_dir_closed; + tee->file_opened = tee_file_opened; + tee->file_added = tee_file_added; + tee->file_deleted = tee_file_deleted; + tee->file_changed = tee_file_changed; + tee->file_closed = tee_file_closed; + tee->node_absent = tee_node_absent; + + return tee; +} + +svn_diff_source_t * +svn_diff__source_create(svn_revnum_t revision, + apr_pool_t *result_pool) +{ + svn_diff_source_t *src = apr_pcalloc(result_pool, sizeof(*src)); + + src->revision = revision; + return src; +} diff --git a/subversion/libsvn_diff/lcs.c b/subversion/libsvn_diff/lcs.c new file mode 100644 index 0000000..8087a92 --- /dev/null +++ b/subversion/libsvn_diff/lcs.c @@ -0,0 +1,375 @@ +/* + * lcs.c : routines for creating an lcs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "diff.h" + + +/* + * Calculate the Longest Common Subsequence (LCS) between two datasources. + * This function is what makes the diff code tick. + * + * The LCS algorithm implemented here is based on the approach described + * by Sun Wu, Udi Manber and Gene Meyers in "An O(NP) Sequence Comparison + * Algorithm", but has been modified for better performance. + * + * Let M and N be the lengths (number of tokens) of the two sources + * ('files'). The goal is to reach the end of both sources (files) with the + * minimum number of insertions + deletions. Since there is a known length + * difference N-M between the files, that is equivalent to just the minimum + * number of deletions, or equivalently the minimum number of insertions. + * For symmetry, we use the lesser number - deletions if M<N, insertions if + * M>N. + * + * Let 'k' be the difference in remaining length between the files, i.e. + * if we're at the beginning of both files, k=N-M, whereas k=0 for the + * 'end state', at the end of both files. An insertion will increase k by + * one, while a deletion decreases k by one. If k<0, then insertions are + * 'free' - we need those to reach the end state k=0 anyway - but deletions + * are costly: Adding a deletion means that we will have to add an additional + * insertion later to reach the end state, so it doesn't matter if we count + * deletions or insertions. Similarly, deletions are free for k>0. + * + * Let a 'state' be a given position in each file {pos1, pos2}. An array + * 'fp' keeps track of the best possible state (largest values of + * {pos1, pos2}) that can be achieved for a given cost 'p' (# moves away + * from k=0), as well as a linked list of what matches were used to reach + * that state. For each new value of p, we find for each value of k the + * best achievable state for that k - either by doing a costly operation + * (deletion if k<0) from a state achieved at a lower p, or doing a free + * operation (insertion if k<0) from a state achieved at the same p - + * and in both cases advancing past any matching regions found. This is + * handled by running loops over k in order of descending absolute value. + * + * A recent improvement of the algorithm is to ignore tokens that are unique + * to one file or the other, as those are known from the start to be + * impossible to match. + */ + +typedef struct svn_diff__snake_t svn_diff__snake_t; + +struct svn_diff__snake_t +{ + apr_off_t y; + svn_diff__lcs_t *lcs; + svn_diff__position_t *position[2]; +}; + +static APR_INLINE void +svn_diff__snake(svn_diff__snake_t *fp_k, + svn_diff__token_index_t *token_counts[2], + svn_diff__lcs_t **freelist, + apr_pool_t *pool) +{ + svn_diff__position_t *start_position[2]; + svn_diff__position_t *position[2]; + svn_diff__lcs_t *lcs; + svn_diff__lcs_t *previous_lcs; + + /* The previous entry at fp[k] is going to be replaced. See if we + * can mark that lcs node for reuse, because the sequence up to this + * point was a dead end. + */ + lcs = fp_k[0].lcs; + while (lcs) + { + lcs->refcount--; + if (lcs->refcount) + break; + + previous_lcs = lcs->next; + lcs->next = *freelist; + *freelist = lcs; + lcs = previous_lcs; + } + + if (fp_k[-1].y >= fp_k[1].y) + { + start_position[0] = fp_k[-1].position[0]; + start_position[1] = fp_k[-1].position[1]->next; + + previous_lcs = fp_k[-1].lcs; + } + else + { + start_position[0] = fp_k[1].position[0]->next; + start_position[1] = fp_k[1].position[1]; + + previous_lcs = fp_k[1].lcs; + } + + + if (previous_lcs) + { + previous_lcs->refcount++; + } + + /* ### Optimization, skip all positions that don't have matchpoints + * ### anyway. Beware of the sentinel, don't skip it! + */ + + position[0] = start_position[0]; + position[1] = start_position[1]; + + while (1) + { + while (position[0]->token_index == position[1]->token_index) + { + position[0] = position[0]->next; + position[1] = position[1]->next; + } + + if (position[1] != start_position[1]) + { + lcs = *freelist; + if (lcs) + { + *freelist = lcs->next; + } + else + { + lcs = apr_palloc(pool, sizeof(*lcs)); + } + + lcs->position[0] = start_position[0]; + lcs->position[1] = start_position[1]; + lcs->length = position[1]->offset - start_position[1]->offset; + lcs->next = previous_lcs; + lcs->refcount = 1; + previous_lcs = lcs; + start_position[0] = position[0]; + start_position[1] = position[1]; + } + + /* Skip any and all tokens that only occur in one of the files */ + if (position[0]->token_index >= 0 + && token_counts[1][position[0]->token_index] == 0) + start_position[0] = position[0] = position[0]->next; + else if (position[1]->token_index >= 0 + && token_counts[0][position[1]->token_index] == 0) + start_position[1] = position[1] = position[1]->next; + else + break; + } + + fp_k[0].lcs = previous_lcs; + fp_k[0].position[0] = position[0]; + fp_k[0].position[1] = position[1]; + + fp_k[0].y = position[1]->offset; +} + + +static svn_diff__lcs_t * +svn_diff__lcs_reverse(svn_diff__lcs_t *lcs) +{ + svn_diff__lcs_t *next; + svn_diff__lcs_t *prev; + + next = NULL; + while (lcs != NULL) + { + prev = lcs->next; + lcs->next = next; + next = lcs; + lcs = prev; + } + + return next; +} + + +/* Prepends a new lcs chunk for the amount of LINES at the given positions + * POS0_OFFSET and POS1_OFFSET to the given LCS chain, and returns it. + * This function assumes LINES > 0. */ +static svn_diff__lcs_t * +prepend_lcs(svn_diff__lcs_t *lcs, apr_off_t lines, + apr_off_t pos0_offset, apr_off_t pos1_offset, + apr_pool_t *pool) +{ + svn_diff__lcs_t *new_lcs; + + SVN_ERR_ASSERT_NO_RETURN(lines > 0); + + new_lcs = apr_palloc(pool, sizeof(*new_lcs)); + new_lcs->position[0] = apr_pcalloc(pool, sizeof(*new_lcs->position[0])); + new_lcs->position[0]->offset = pos0_offset; + new_lcs->position[1] = apr_pcalloc(pool, sizeof(*new_lcs->position[1])); + new_lcs->position[1]->offset = pos1_offset; + new_lcs->length = lines; + new_lcs->refcount = 1; + new_lcs->next = lcs; + + return new_lcs; +} + + +svn_diff__lcs_t * +svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ + svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + svn_diff__token_index_t *token_counts_list1, /* array of counts */ + svn_diff__token_index_t *token_counts_list2, /* array of counts */ + svn_diff__token_index_t num_tokens, + apr_off_t prefix_lines, + apr_off_t suffix_lines, + apr_pool_t *pool) +{ + apr_off_t length[2]; + svn_diff__token_index_t *token_counts[2]; + svn_diff__token_index_t unique_count[2]; + svn_diff__token_index_t token_index; + svn_diff__snake_t *fp; + apr_off_t d; + apr_off_t k; + apr_off_t p = 0; + svn_diff__lcs_t *lcs, *lcs_freelist = NULL; + + svn_diff__position_t sentinel_position[2]; + + /* Since EOF is always a sync point we tack on an EOF link + * with sentinel positions + */ + lcs = apr_palloc(pool, sizeof(*lcs)); + lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0])); + lcs->position[0]->offset = position_list1 + ? position_list1->offset + suffix_lines + 1 + : prefix_lines + suffix_lines + 1; + lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1])); + lcs->position[1]->offset = position_list2 + ? position_list2->offset + suffix_lines + 1 + : prefix_lines + suffix_lines + 1; + lcs->length = 0; + lcs->refcount = 1; + lcs->next = NULL; + + if (position_list1 == NULL || position_list2 == NULL) + { + if (suffix_lines) + lcs = prepend_lcs(lcs, suffix_lines, + lcs->position[0]->offset - suffix_lines, + lcs->position[1]->offset - suffix_lines, + pool); + if (prefix_lines) + lcs = prepend_lcs(lcs, prefix_lines, 1, 1, pool); + + return lcs; + } + + unique_count[1] = unique_count[0] = 0; + for (token_index = 0; token_index < num_tokens; token_index++) + { + if (token_counts_list1[token_index] == 0) + unique_count[1] += token_counts_list2[token_index]; + if (token_counts_list2[token_index] == 0) + unique_count[0] += token_counts_list1[token_index]; + } + + /* Calculate lengths M and N of the sequences to be compared. Do not + * count tokens unique to one file, as those are ignored in __snake. + */ + length[0] = position_list1->offset - position_list1->next->offset + 1 + - unique_count[0]; + length[1] = position_list2->offset - position_list2->next->offset + 1 + - unique_count[1]; + + /* strikerXXX: here we allocate the furthest point array, which is + * strikerXXX: sized M + N + 3 (!) + */ + fp = apr_pcalloc(pool, + sizeof(*fp) * (apr_size_t)(length[0] + length[1] + 3)); + + /* The origo of fp corresponds to the end state, where we are + * at the end of both files. The valid states thus span from + * -N (at end of first file and at the beginning of the second + * file) to +M (the opposite :). Finally, svn_diff__snake needs + * 1 extra slot on each side to work. + */ + fp += length[1] + 1; + + sentinel_position[0].next = position_list1->next; + position_list1->next = &sentinel_position[0]; + sentinel_position[0].offset = position_list1->offset + 1; + token_counts[0] = token_counts_list1; + + sentinel_position[1].next = position_list2->next; + position_list2->next = &sentinel_position[1]; + sentinel_position[1].offset = position_list2->offset + 1; + token_counts[1] = token_counts_list2; + + /* Negative indices will not be used elsewhere + */ + sentinel_position[0].token_index = -1; + sentinel_position[1].token_index = -2; + + /* position d = M - N corresponds to the initial state, where + * we are at the beginning of both files. + */ + d = length[0] - length[1]; + + /* k = d - 1 will be the first to be used to get previous + * position information from, make sure it holds sane + * data + */ + fp[d - 1].position[0] = sentinel_position[0].next; + fp[d - 1].position[1] = &sentinel_position[1]; + + p = 0; + do + { + /* For k < 0, insertions are free */ + for (k = (d < 0 ? d : 0) - p; k < 0; k++) + { + svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool); + } + /* for k > 0, deletions are free */ + for (k = (d > 0 ? d : 0) + p; k >= 0; k--) + { + svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool); + } + + p++; + } + while (fp[0].position[1] != &sentinel_position[1]); + + if (suffix_lines) + lcs->next = prepend_lcs(fp[0].lcs, suffix_lines, + lcs->position[0]->offset - suffix_lines, + lcs->position[1]->offset - suffix_lines, + pool); + else + lcs->next = fp[0].lcs; + + lcs = svn_diff__lcs_reverse(lcs); + + position_list1->next = sentinel_position[0].next; + position_list2->next = sentinel_position[1].next; + + if (prefix_lines) + return prepend_lcs(lcs, prefix_lines, 1, 1, pool); + else + return lcs; +} diff --git a/subversion/libsvn_diff/parse-diff.c b/subversion/libsvn_diff/parse-diff.c new file mode 100644 index 0000000..a01b4d5 --- /dev/null +++ b/subversion/libsvn_diff/parse-diff.c @@ -0,0 +1,1373 @@ +/* + * parse-diff.c: functions for parsing diff files + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <stdlib.h> +#include <stddef.h> +#include <string.h> + +#include "svn_hash.h" +#include "svn_types.h" +#include "svn_error.h" +#include "svn_io.h" +#include "svn_pools.h" +#include "svn_props.h" +#include "svn_string.h" +#include "svn_utf.h" +#include "svn_dirent_uri.h" +#include "svn_diff.h" + +#include "private/svn_eol_private.h" +#include "private/svn_dep_compat.h" + +/* Helper macro for readability */ +#define starts_with(str, start) \ + (strncmp((str), (start), strlen(start)) == 0) + +/* Like strlen() but for string literals. */ +#define STRLEN_LITERAL(str) (sizeof(str) - 1) + +/* This struct describes a range within a file, as well as the + * current cursor position within the range. All numbers are in bytes. */ +struct svn_diff__hunk_range { + apr_off_t start; + apr_off_t end; + apr_off_t current; +}; + +struct svn_diff_hunk_t { + /* The patch this hunk belongs to. */ + svn_patch_t *patch; + + /* APR file handle to the patch file this hunk came from. */ + apr_file_t *apr_file; + + /* Ranges used to keep track of this hunk's texts positions within + * the patch file. */ + struct svn_diff__hunk_range diff_text_range; + struct svn_diff__hunk_range original_text_range; + struct svn_diff__hunk_range modified_text_range; + + /* Hunk ranges as they appeared in the patch file. + * All numbers are lines, not bytes. */ + svn_linenum_t original_start; + svn_linenum_t original_length; + svn_linenum_t modified_start; + svn_linenum_t modified_length; + + /* Number of lines of leading and trailing hunk context. */ + svn_linenum_t leading_context; + svn_linenum_t trailing_context; +}; + +void +svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) +{ + hunk->diff_text_range.current = hunk->diff_text_range.start; +} + +void +svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk) +{ + if (hunk->patch->reverse) + hunk->modified_text_range.current = hunk->modified_text_range.start; + else + hunk->original_text_range.current = hunk->original_text_range.start; +} + +void +svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk) +{ + if (hunk->patch->reverse) + hunk->original_text_range.current = hunk->original_text_range.start; + else + hunk->modified_text_range.current = hunk->modified_text_range.start; +} + +svn_linenum_t +svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk) +{ + return hunk->patch->reverse ? hunk->modified_start : hunk->original_start; +} + +svn_linenum_t +svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk) +{ + return hunk->patch->reverse ? hunk->modified_length : hunk->original_length; +} + +svn_linenum_t +svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk) +{ + return hunk->patch->reverse ? hunk->original_start : hunk->modified_start; +} + +svn_linenum_t +svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk) +{ + return hunk->patch->reverse ? hunk->original_length : hunk->modified_length; +} + +svn_linenum_t +svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk) +{ + return hunk->leading_context; +} + +svn_linenum_t +svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) +{ + return hunk->trailing_context; +} + +/* Try to parse a positive number from a decimal number encoded + * in the string NUMBER. Return parsed number in OFFSET, and return + * TRUE if parsing was successful. */ +static svn_boolean_t +parse_offset(svn_linenum_t *offset, const char *number) +{ + svn_error_t *err; + apr_uint64_t val; + + err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10); + if (err) + { + svn_error_clear(err); + return FALSE; + } + + *offset = (svn_linenum_t)val; + + return TRUE; +} + +/* Try to parse a hunk range specification from the string RANGE. + * Return parsed information in *START and *LENGTH, and return TRUE + * if the range parsed correctly. Note: This function may modify the + * input value RANGE. */ +static svn_boolean_t +parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range) +{ + char *comma; + + if (*range == 0) + return FALSE; + + comma = strstr(range, ","); + if (comma) + { + if (strlen(comma + 1) > 0) + { + /* Try to parse the length. */ + if (! parse_offset(length, comma + 1)) + return FALSE; + + /* Snip off the end of the string, + * so we can comfortably parse the line + * number the hunk starts at. */ + *comma = '\0'; + } + else + /* A comma but no length? */ + return FALSE; + } + else + { + *length = 1; + } + + /* Try to parse the line number the hunk starts at. */ + return parse_offset(start, range); +} + +/* Try to parse a hunk header in string HEADER, putting parsed information + * into HUNK. Return TRUE if the header parsed correctly. ATAT is the + * character string used to delimit the hunk header. + * Do all allocations in POOL. */ +static svn_boolean_t +parse_hunk_header(const char *header, svn_diff_hunk_t *hunk, + const char *atat, apr_pool_t *pool) +{ + const char *p; + const char *start; + svn_stringbuf_t *range; + + p = header + strlen(atat); + if (*p != ' ') + /* No. */ + return FALSE; + p++; + if (*p != '-') + /* Nah... */ + return FALSE; + /* OK, this may be worth allocating some memory for... */ + range = svn_stringbuf_create_ensure(31, pool); + start = ++p; + while (*p && *p != ' ') + { + p++; + } + + if (*p != ' ') + /* No no no... */ + return FALSE; + + svn_stringbuf_appendbytes(range, start, p - start); + + /* Try to parse the first range. */ + if (! parse_range(&hunk->original_start, &hunk->original_length, range->data)) + return FALSE; + + /* Clear the stringbuf so we can reuse it for the second range. */ + svn_stringbuf_setempty(range); + p++; + if (*p != '+') + /* Eeek! */ + return FALSE; + /* OK, this may be worth copying... */ + start = ++p; + while (*p && *p != ' ') + { + p++; + } + if (*p != ' ') + /* No no no... */ + return FALSE; + + svn_stringbuf_appendbytes(range, start, p - start); + + /* Check for trailing @@ */ + p++; + if (! starts_with(p, atat)) + return FALSE; + + /* There may be stuff like C-function names after the trailing @@, + * but we ignore that. */ + + /* Try to parse the second range. */ + if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data)) + return FALSE; + + /* Hunk header is good. */ + return TRUE; +} + +/* Read a line of original or modified hunk text from the specified + * RANGE within FILE. FILE is expected to contain unidiff text. + * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, + * Any lines commencing with the VERBOTEN character are discarded. + * VERBOTEN should be '+' or '-', depending on which form of hunk text + * is being read. + * + * All other parameters are as in svn_diff_hunk_readline_original_text() + * and svn_diff_hunk_readline_modified_text(). + */ +static svn_error_t * +hunk_readline_original_or_modified(apr_file_t *file, + struct svn_diff__hunk_range *range, + svn_stringbuf_t **stringbuf, + const char **eol, + svn_boolean_t *eof, + char verboten, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_size_t max_len; + svn_boolean_t filtered; + apr_off_t pos; + svn_stringbuf_t *str; + + if (range->current >= range->end) + { + /* We're past the range. Indicate that no bytes can be read. */ + *eof = TRUE; + if (eol) + *eol = NULL; + *stringbuf = svn_stringbuf_create_empty(result_pool); + return SVN_NO_ERROR; + } + + pos = 0; + SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool)); + SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); + do + { + max_len = range->end - range->current; + SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, + result_pool, scratch_pool)); + range->current = 0; + SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool)); + filtered = (str->data[0] == verboten || str->data[0] == '\\'); + } + while (filtered && ! *eof); + + if (filtered) + { + /* EOF, return an empty string. */ + *stringbuf = svn_stringbuf_create_ensure(0, result_pool); + } + else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') + { + /* Shave off leading unidiff symbols. */ + *stringbuf = svn_stringbuf_create(str->data + 1, result_pool); + } + else + { + /* Return the line as-is. */ + *stringbuf = svn_stringbuf_dup(str, result_pool); + } + + SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, + svn_stringbuf_t **stringbuf, + const char **eol, + svn_boolean_t *eof, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + return svn_error_trace( + hunk_readline_original_or_modified(hunk->apr_file, + hunk->patch->reverse ? + &hunk->modified_text_range : + &hunk->original_text_range, + stringbuf, eol, eof, + hunk->patch->reverse ? '-' : '+', + result_pool, scratch_pool)); +} + +svn_error_t * +svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, + svn_stringbuf_t **stringbuf, + const char **eol, + svn_boolean_t *eof, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + return svn_error_trace( + hunk_readline_original_or_modified(hunk->apr_file, + hunk->patch->reverse ? + &hunk->original_text_range : + &hunk->modified_text_range, + stringbuf, eol, eof, + hunk->patch->reverse ? '+' : '-', + result_pool, scratch_pool)); +} + +svn_error_t * +svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, + svn_stringbuf_t **stringbuf, + const char **eol, + svn_boolean_t *eof, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_diff_hunk_t dummy; + svn_stringbuf_t *line; + apr_size_t max_len; + apr_off_t pos; + + if (hunk->diff_text_range.current >= hunk->diff_text_range.end) + { + /* We're past the range. Indicate that no bytes can be read. */ + *eof = TRUE; + if (eol) + *eol = NULL; + *stringbuf = svn_stringbuf_create_empty(result_pool); + return SVN_NO_ERROR; + } + + pos = 0; + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool)); + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, + &hunk->diff_text_range.current, scratch_pool)); + max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; + SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, + result_pool, + scratch_pool)); + hunk->diff_text_range.current = 0; + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, + &hunk->diff_text_range.current, scratch_pool)); + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); + + if (hunk->patch->reverse) + { + if (parse_hunk_header(line->data, &dummy, "@@", scratch_pool)) + { + /* Line is a hunk header, reverse it. */ + line = svn_stringbuf_createf(result_pool, + "@@ -%lu,%lu +%lu,%lu @@", + hunk->modified_start, + hunk->modified_length, + hunk->original_start, + hunk->original_length); + } + else if (parse_hunk_header(line->data, &dummy, "##", scratch_pool)) + { + /* Line is a hunk header, reverse it. */ + line = svn_stringbuf_createf(result_pool, + "## -%lu,%lu +%lu,%lu ##", + hunk->modified_start, + hunk->modified_length, + hunk->original_start, + hunk->original_length); + } + else + { + if (line->data[0] == '+') + line->data[0] = '-'; + else if (line->data[0] == '-') + line->data[0] = '+'; + } + } + + *stringbuf = line; + + return SVN_NO_ERROR; +} + +/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line. + * Allocate *PROP_NAME in RESULT_POOL. + * Set *PROP_NAME to NULL if no valid property name was found. */ +static svn_error_t * +parse_prop_name(const char **prop_name, const char *header, + const char *indicator, apr_pool_t *result_pool) +{ + SVN_ERR(svn_utf_cstring_to_utf8(prop_name, + header + strlen(indicator), + result_pool)); + if (**prop_name == '\0') + *prop_name = NULL; + else if (! svn_prop_name_is_valid(*prop_name)) + { + svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool); + svn_stringbuf_strip_whitespace(buf); + *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL); + } + + return SVN_NO_ERROR; +} + +/* Return the next *HUNK from a PATCH in APR_FILE. + * If no hunk can be found, set *HUNK to NULL. + * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK + * is the first belonging to a certain property, then PROP_NAME and + * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be + * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be + * treated as context lines. Allocate results in RESULT_POOL. + * Use SCRATCH_POOL for all other allocations. */ +static svn_error_t * +parse_next_hunk(svn_diff_hunk_t **hunk, + svn_boolean_t *is_property, + const char **prop_name, + svn_diff_operation_kind_t *prop_operation, + svn_patch_t *patch, + apr_file_t *apr_file, + svn_boolean_t ignore_whitespace, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + static const char * const minus = "--- "; + static const char * const text_atat = "@@"; + static const char * const prop_atat = "##"; + svn_stringbuf_t *line; + svn_boolean_t eof, in_hunk, hunk_seen; + apr_off_t pos, last_line; + apr_off_t start, end; + apr_off_t original_end; + apr_off_t modified_end; + svn_linenum_t original_lines; + svn_linenum_t modified_lines; + svn_linenum_t leading_context; + svn_linenum_t trailing_context; + svn_boolean_t changed_line_seen; + enum { + noise_line, + original_line, + modified_line, + context_line + } last_line_type; + apr_pool_t *iterpool; + + *prop_operation = svn_diff_op_unchanged; + + /* We only set this if we have a property hunk header. */ + *prop_name = NULL; + *is_property = FALSE; + + if (apr_file_eof(apr_file) == APR_EOF) + { + /* No more hunks here. */ + *hunk = NULL; + return SVN_NO_ERROR; + } + + in_hunk = FALSE; + hunk_seen = FALSE; + leading_context = 0; + trailing_context = 0; + changed_line_seen = FALSE; + original_end = 0; + modified_end = 0; + *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); + + /* Get current seek position -- APR has no ftell() :( */ + pos = 0; + SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool)); + + /* Start out assuming noise. */ + last_line_type = noise_line; + + iterpool = svn_pool_create(scratch_pool); + do + { + + svn_pool_clear(iterpool); + + /* Remember the current line's offset, and read the line. */ + last_line = pos; + SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, + iterpool, iterpool)); + + /* Update line offset for next iteration. */ + pos = 0; + SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool)); + + /* Lines starting with a backslash indicate a missing EOL: + * "\ No newline at end of file" or "end of property". */ + if (line->data[0] == '\\') + { + if (in_hunk) + { + char eolbuf[2]; + apr_size_t len; + apr_off_t off; + apr_off_t hunk_text_end; + + /* Comment terminates the hunk text and says the hunk text + * has no trailing EOL. Snip off trailing EOL which is part + * of the patch file but not part of the hunk text. */ + off = last_line - 2; + SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool)); + len = sizeof(eolbuf); + SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len, + &eof, iterpool)); + if (eolbuf[0] == '\r' && eolbuf[1] == '\n') + hunk_text_end = last_line - 2; + else if (eolbuf[1] == '\n' || eolbuf[1] == '\r') + hunk_text_end = last_line - 1; + else + hunk_text_end = last_line; + + if (last_line_type == original_line && original_end == 0) + original_end = hunk_text_end; + else if (last_line_type == modified_line && modified_end == 0) + modified_end = hunk_text_end; + else if (last_line_type == context_line) + { + if (original_end == 0) + original_end = hunk_text_end; + if (modified_end == 0) + modified_end = hunk_text_end; + } + + SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); + } + + continue; + } + + if (in_hunk) + { + char c; + static const char add = '+'; + static const char del = '-'; + + if (! hunk_seen) + { + /* We're reading the first line of the hunk, so the start + * of the line just read is the hunk text's byte offset. */ + start = last_line; + } + + c = line->data[0]; + if (original_lines > 0 && modified_lines > 0 && + ((c == ' ') + /* Tolerate chopped leading spaces on empty lines. */ + || (! eof && line->len == 0) + /* Maybe tolerate chopped leading spaces on non-empty lines. */ + || (ignore_whitespace && c != del && c != add))) + { + /* It's a "context" line in the hunk. */ + hunk_seen = TRUE; + original_lines--; + modified_lines--; + if (changed_line_seen) + trailing_context++; + else + leading_context++; + last_line_type = context_line; + } + else if (original_lines > 0 && c == del) + { + /* It's a "deleted" line in the hunk. */ + hunk_seen = TRUE; + changed_line_seen = TRUE; + + /* A hunk may have context in the middle. We only want + trailing lines of context. */ + if (trailing_context > 0) + trailing_context = 0; + + original_lines--; + last_line_type = original_line; + } + else if (modified_lines > 0 && c == add) + { + /* It's an "added" line in the hunk. */ + hunk_seen = TRUE; + changed_line_seen = TRUE; + + /* A hunk may have context in the middle. We only want + trailing lines of context. */ + if (trailing_context > 0) + trailing_context = 0; + + modified_lines--; + last_line_type = modified_line; + } + else + { + if (eof) + { + /* The hunk ends at EOF. */ + end = pos; + } + else + { + /* The start of the current line marks the first byte + * after the hunk text. */ + end = last_line; + } + + if (original_end == 0) + original_end = end; + if (modified_end == 0) + modified_end = end; + break; /* Hunk was empty or has been read. */ + } + } + else + { + if (starts_with(line->data, text_atat)) + { + /* Looks like we have a hunk header, try to rip it apart. */ + in_hunk = parse_hunk_header(line->data, *hunk, text_atat, + iterpool); + if (in_hunk) + { + original_lines = (*hunk)->original_length; + modified_lines = (*hunk)->modified_length; + *is_property = FALSE; + } + } + else if (starts_with(line->data, prop_atat)) + { + /* Looks like we have a property hunk header, try to rip it + * apart. */ + in_hunk = parse_hunk_header(line->data, *hunk, prop_atat, + iterpool); + if (in_hunk) + { + original_lines = (*hunk)->original_length; + modified_lines = (*hunk)->modified_length; + *is_property = TRUE; + } + } + else if (starts_with(line->data, "Added: ")) + { + SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", + result_pool)); + if (*prop_name) + *prop_operation = svn_diff_op_added; + } + else if (starts_with(line->data, "Deleted: ")) + { + SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", + result_pool)); + if (*prop_name) + *prop_operation = svn_diff_op_deleted; + } + else if (starts_with(line->data, "Modified: ")) + { + SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ", + result_pool)); + if (*prop_name) + *prop_operation = svn_diff_op_modified; + } + else if (starts_with(line->data, minus) + || starts_with(line->data, "diff --git ")) + /* This could be a header of another patch. Bail out. */ + break; + } + } + /* Check for the line length since a file may not have a newline at the + * end and we depend upon the last line to be an empty one. */ + while (! eof || line->len > 0); + svn_pool_destroy(iterpool); + + if (! eof) + /* Rewind to the start of the line just read, so subsequent calls + * to this function or svn_diff_parse_next_patch() don't end + * up skipping the line -- it may contain a patch or hunk header. */ + SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); + + if (hunk_seen && start < end) + { + (*hunk)->patch = patch; + (*hunk)->apr_file = apr_file; + (*hunk)->leading_context = leading_context; + (*hunk)->trailing_context = trailing_context; + (*hunk)->diff_text_range.start = start; + (*hunk)->diff_text_range.current = start; + (*hunk)->diff_text_range.end = end; + (*hunk)->original_text_range.start = start; + (*hunk)->original_text_range.current = start; + (*hunk)->original_text_range.end = original_end; + (*hunk)->modified_text_range.start = start; + (*hunk)->modified_text_range.current = start; + (*hunk)->modified_text_range.end = modified_end; + } + else + /* Something went wrong, just discard the result. */ + *hunk = NULL; + + return SVN_NO_ERROR; +} + +/* Compare function for sorting hunks after parsing. + * We sort hunks by their original line offset. */ +static int +compare_hunks(const void *a, const void *b) +{ + const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a); + const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b); + + if (ha->original_start < hb->original_start) + return -1; + if (ha->original_start > hb->original_start) + return 1; + return 0; +} + +/* Possible states of the diff header parser. */ +enum parse_state +{ + state_start, /* initial */ + state_git_diff_seen, /* diff --git */ + state_git_tree_seen, /* a tree operation, rather then content change */ + state_git_minus_seen, /* --- /dev/null; or --- a/ */ + state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ + state_move_from_seen, /* rename from foo.c */ + state_copy_from_seen, /* copy from foo.c */ + state_minus_seen, /* --- foo.c */ + state_unidiff_found, /* valid start of a regular unidiff header */ + state_git_header_found /* valid start of a --git diff header */ +}; + +/* Data type describing a valid state transition of the parser. */ +struct transition +{ + const char *expected_input; + enum parse_state required_state; + + /* A callback called upon each parser state transition. */ + svn_error_t *(*fn)(enum parse_state *new_state, char *input, + svn_patch_t *patch, apr_pool_t *result_pool, + apr_pool_t *scratch_pool); +}; + +/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */ +static svn_error_t * +grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + const char *utf8_path; + const char *canon_path; + + /* Grab the filename and encode it in UTF-8. */ + /* TODO: Allow specifying the patch file's encoding. + * For now, we assume its encoding is native. */ + /* ### This can fail if the filename cannot be represented in the current + * ### locale's encoding. */ + SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path, + line, + scratch_pool)); + + /* Canonicalize the path name. */ + canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool); + + *file_name = apr_pstrdup(result_pool, canon_path); + + return SVN_NO_ERROR; +} + +/* Parse the '--- ' line of a regular unidiff. */ +static svn_error_t * +diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + /* If we can find a tab, it separates the filename from + * the rest of the line which we can discard. */ + char *tab = strchr(line, '\t'); + if (tab) + *tab = '\0'; + + SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "), + result_pool, scratch_pool)); + + *new_state = state_minus_seen; + + return SVN_NO_ERROR; +} + +/* Parse the '+++ ' line of a regular unidiff. */ +static svn_error_t * +diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + /* If we can find a tab, it separates the filename from + * the rest of the line which we can discard. */ + char *tab = strchr(line, '\t'); + if (tab) + *tab = '\0'; + + SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "), + result_pool, scratch_pool)); + + *new_state = state_unidiff_found; + + return SVN_NO_ERROR; +} + +/* Parse the first line of a git extended unidiff. */ +static svn_error_t * +git_start(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + const char *old_path_start; + char *old_path_end; + const char *new_path_start; + const char *new_path_end; + char *new_path_marker; + const char *old_path_marker; + + /* ### Add handling of escaped paths + * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html: + * + * TAB, LF, double quote and backslash characters in pathnames are + * represented as \t, \n, \" and \\, respectively. If there is need for + * such substitution then the whole pathname is put in double quotes. + */ + + /* Our line should look like this: 'diff --git a/path b/path'. + * + * If we find any deviations from that format, we return with state reset + * to start. + */ + old_path_marker = strstr(line, " a/"); + + if (! old_path_marker) + { + *new_state = state_start; + return SVN_NO_ERROR; + } + + if (! *(old_path_marker + 3)) + { + *new_state = state_start; + return SVN_NO_ERROR; + } + + new_path_marker = strstr(old_path_marker, " b/"); + + if (! new_path_marker) + { + *new_state = state_start; + return SVN_NO_ERROR; + } + + if (! *(new_path_marker + 3)) + { + *new_state = state_start; + return SVN_NO_ERROR; + } + + /* By now, we know that we have a line on the form '--git diff a/.+ b/.+' + * We only need the filenames when we have deleted or added empty + * files. In those cases the old_path and new_path is identical on the + * 'diff --git' line. For all other cases we fetch the filenames from + * other header lines. */ + old_path_start = line + STRLEN_LITERAL("diff --git a/"); + new_path_end = line + strlen(line); + new_path_start = old_path_start; + + while (TRUE) + { + ptrdiff_t len_old; + ptrdiff_t len_new; + + new_path_marker = strstr(new_path_start, " b/"); + + /* No new path marker, bail out. */ + if (! new_path_marker) + break; + + old_path_end = new_path_marker; + new_path_start = new_path_marker + STRLEN_LITERAL(" b/"); + + /* No path after the marker. */ + if (! *new_path_start) + break; + + len_old = old_path_end - old_path_start; + len_new = new_path_end - new_path_start; + + /* Are the paths before and after the " b/" marker the same? */ + if (len_old == len_new + && ! strncmp(old_path_start, new_path_start, len_old)) + { + *old_path_end = '\0'; + SVN_ERR(grab_filename(&patch->old_filename, old_path_start, + result_pool, scratch_pool)); + + SVN_ERR(grab_filename(&patch->new_filename, new_path_start, + result_pool, scratch_pool)); + break; + } + } + + /* We assume that the path is only modified until we've found a 'tree' + * header */ + patch->operation = svn_diff_op_modified; + + *new_state = state_git_diff_seen; + return SVN_NO_ERROR; +} + +/* Parse the '--- ' line of a git extended unidiff. */ +static svn_error_t * +git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + /* If we can find a tab, it separates the filename from + * the rest of the line which we can discard. */ + char *tab = strchr(line, '\t'); + if (tab) + *tab = '\0'; + + if (starts_with(line, "--- /dev/null")) + SVN_ERR(grab_filename(&patch->old_filename, "/dev/null", + result_pool, scratch_pool)); + else + SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"), + result_pool, scratch_pool)); + + *new_state = state_git_minus_seen; + return SVN_NO_ERROR; +} + +/* Parse the '+++ ' line of a git extended unidiff. */ +static svn_error_t * +git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + /* If we can find a tab, it separates the filename from + * the rest of the line which we can discard. */ + char *tab = strchr(line, '\t'); + if (tab) + *tab = '\0'; + + if (starts_with(line, "+++ /dev/null")) + SVN_ERR(grab_filename(&patch->new_filename, "/dev/null", + result_pool, scratch_pool)); + else + SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"), + result_pool, scratch_pool)); + + *new_state = state_git_header_found; + return SVN_NO_ERROR; +} + +/* Parse the 'rename from ' line of a git extended unidiff. */ +static svn_error_t * +git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(grab_filename(&patch->old_filename, + line + STRLEN_LITERAL("rename from "), + result_pool, scratch_pool)); + + *new_state = state_move_from_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'rename to ' line of a git extended unidiff. */ +static svn_error_t * +git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(grab_filename(&patch->new_filename, + line + STRLEN_LITERAL("rename to "), + result_pool, scratch_pool)); + + patch->operation = svn_diff_op_moved; + + *new_state = state_git_tree_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'copy from ' line of a git extended unidiff. */ +static svn_error_t * +git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(grab_filename(&patch->old_filename, + line + STRLEN_LITERAL("copy from "), + result_pool, scratch_pool)); + + *new_state = state_copy_from_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'copy to ' line of a git extended unidiff. */ +static svn_error_t * +git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "), + result_pool, scratch_pool)); + + patch->operation = svn_diff_op_copied; + + *new_state = state_git_tree_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'new file ' line of a git extended unidiff. */ +static svn_error_t * +git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + patch->operation = svn_diff_op_added; + + /* Filename already retrieved from diff --git header. */ + + *new_state = state_git_tree_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'deleted file ' line of a git extended unidiff. */ +static svn_error_t * +git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + patch->operation = svn_diff_op_deleted; + + /* Filename already retrieved from diff --git header. */ + + *new_state = state_git_tree_seen; + return SVN_NO_ERROR; +} + +/* Add a HUNK associated with the property PROP_NAME to PATCH. */ +static svn_error_t * +add_property_hunk(svn_patch_t *patch, const char *prop_name, + svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation, + apr_pool_t *result_pool) +{ + svn_prop_patch_t *prop_patch; + + prop_patch = svn_hash_gets(patch->prop_patches, prop_name); + + if (! prop_patch) + { + prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t)); + prop_patch->name = prop_name; + prop_patch->operation = operation; + prop_patch->hunks = apr_array_make(result_pool, 1, + sizeof(svn_diff_hunk_t *)); + + svn_hash_sets(patch->prop_patches, prop_name, prop_patch); + } + + APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk; + + return SVN_NO_ERROR; +} + +struct svn_patch_file_t +{ + /* The APR file handle to the patch file. */ + apr_file_t *apr_file; + + /* The file offset at which the next patch is expected. */ + apr_off_t next_patch_offset; +}; + +svn_error_t * +svn_diff_open_patch_file(svn_patch_file_t **patch_file, + const char *local_abspath, + apr_pool_t *result_pool) +{ + svn_patch_file_t *p; + + p = apr_palloc(result_pool, sizeof(*p)); + SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath, + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, + result_pool)); + p->next_patch_offset = 0; + *patch_file = p; + + return SVN_NO_ERROR; +} + +/* Parse hunks from APR_FILE and store them in PATCH->HUNKS. + * Parsing stops if no valid next hunk can be found. + * If IGNORE_WHITESPACE is TRUE, lines without + * leading spaces will be treated as context lines. + * Allocate results in RESULT_POOL. + * Use SCRATCH_POOL for temporary allocations. */ +static svn_error_t * +parse_hunks(svn_patch_t *patch, apr_file_t *apr_file, + svn_boolean_t ignore_whitespace, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + svn_diff_hunk_t *hunk; + svn_boolean_t is_property; + const char *last_prop_name; + const char *prop_name; + svn_diff_operation_kind_t prop_operation; + apr_pool_t *iterpool; + + last_prop_name = NULL; + + patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *)); + patch->prop_patches = apr_hash_make(result_pool); + iterpool = svn_pool_create(scratch_pool); + do + { + svn_pool_clear(iterpool); + + SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation, + patch, apr_file, ignore_whitespace, result_pool, + iterpool)); + + if (hunk && is_property) + { + if (! prop_name) + prop_name = last_prop_name; + else + last_prop_name = prop_name; + SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation, + result_pool)); + } + else if (hunk) + { + APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk; + last_prop_name = NULL; + } + + } + while (hunk); + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* State machine for the diff header parser. + * Expected Input Required state Function to call */ +static struct transition transitions[] = +{ + {"--- ", state_start, diff_minus}, + {"+++ ", state_minus_seen, diff_plus}, + {"diff --git", state_start, git_start}, + {"--- a/", state_git_diff_seen, git_minus}, + {"--- a/", state_git_tree_seen, git_minus}, + {"--- /dev/null", state_git_tree_seen, git_minus}, + {"+++ b/", state_git_minus_seen, git_plus}, + {"+++ /dev/null", state_git_minus_seen, git_plus}, + {"rename from ", state_git_diff_seen, git_move_from}, + {"rename to ", state_move_from_seen, git_move_to}, + {"copy from ", state_git_diff_seen, git_copy_from}, + {"copy to ", state_copy_from_seen, git_copy_to}, + {"new file ", state_git_diff_seen, git_new_file}, + {"deleted file ", state_git_diff_seen, git_deleted_file}, +}; + +svn_error_t * +svn_diff_parse_next_patch(svn_patch_t **patch, + svn_patch_file_t *patch_file, + svn_boolean_t reverse, + svn_boolean_t ignore_whitespace, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_off_t pos, last_line; + svn_boolean_t eof; + svn_boolean_t line_after_tree_header_read = FALSE; + apr_pool_t *iterpool; + enum parse_state state = state_start; + + if (apr_file_eof(patch_file->apr_file) == APR_EOF) + { + /* No more patches here. */ + *patch = NULL; + return SVN_NO_ERROR; + } + + *patch = apr_pcalloc(result_pool, sizeof(**patch)); + + pos = patch_file->next_patch_offset; + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); + + iterpool = svn_pool_create(scratch_pool); + do + { + svn_stringbuf_t *line; + svn_boolean_t valid_header_line = FALSE; + int i; + + svn_pool_clear(iterpool); + + /* Remember the current line's offset, and read the line. */ + last_line = pos; + SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof, + APR_SIZE_MAX, iterpool, iterpool)); + + if (! eof) + { + /* Update line offset for next iteration. */ + pos = 0; + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos, + iterpool)); + } + + /* Run the state machine. */ + for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++) + { + if (starts_with(line->data, transitions[i].expected_input) + && state == transitions[i].required_state) + { + SVN_ERR(transitions[i].fn(&state, line->data, *patch, + result_pool, iterpool)); + valid_header_line = TRUE; + break; + } + } + + if (state == state_unidiff_found || state == state_git_header_found) + { + /* We have a valid diff header, yay! */ + break; + } + else if (state == state_git_tree_seen && line_after_tree_header_read) + { + /* git patches can contain an index line after the file mode line */ + if (!starts_with(line->data, "index ")) + { + /* We have a valid diff header for a patch with only tree changes. + * Rewind to the start of the line just read, so subsequent calls + * to this function don't end up skipping the line -- it may + * contain a patch. */ + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, + scratch_pool)); + break; + } + } + else if (state == state_git_tree_seen) + { + line_after_tree_header_read = TRUE; + } + else if (! valid_header_line && state != state_start + && !starts_with(line->data, "index ")) + { + /* We've encountered an invalid diff header. + * + * Rewind to the start of the line just read - it may be a new + * header that begins there. */ + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, + scratch_pool)); + state = state_start; + } + + } + while (! eof); + + (*patch)->reverse = reverse; + if (reverse) + { + const char *temp; + temp = (*patch)->old_filename; + (*patch)->old_filename = (*patch)->new_filename; + (*patch)->new_filename = temp; + } + + if ((*patch)->old_filename == NULL || (*patch)->new_filename == NULL) + { + /* Something went wrong, just discard the result. */ + *patch = NULL; + } + else + SVN_ERR(parse_hunks(*patch, patch_file->apr_file, ignore_whitespace, + result_pool, iterpool)); + + svn_pool_destroy(iterpool); + + patch_file->next_patch_offset = 0; + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, + &patch_file->next_patch_offset, scratch_pool)); + + if (*patch) + { + /* Usually, hunks appear in the patch sorted by their original line + * offset. But just in case they weren't parsed in this order for + * some reason, we sort them so that our caller can assume that hunks + * are sorted as if parsed from a usual patch. */ + qsort((*patch)->hunks->elts, (*patch)->hunks->nelts, + (*patch)->hunks->elt_size, compare_hunks); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_close_patch_file(svn_patch_file_t *patch_file, + apr_pool_t *scratch_pool) +{ + return svn_error_trace(svn_io_file_close(patch_file->apr_file, + scratch_pool)); +} diff --git a/subversion/libsvn_diff/token.c b/subversion/libsvn_diff/token.c new file mode 100644 index 0000000..6388d9f --- /dev/null +++ b/subversion/libsvn_diff/token.c @@ -0,0 +1,198 @@ +/* + * token.c : routines for doing diffs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_pools.h> +#include <apr_general.h> + +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" + +#include "diff.h" + + +/* + * Prime number to use as the size of the hash table. This number was + * not selected by testing of any kind and may need tweaking. + */ +#define SVN_DIFF__HASH_SIZE 127 + +struct svn_diff__node_t +{ + svn_diff__node_t *parent; + svn_diff__node_t *left; + svn_diff__node_t *right; + + apr_uint32_t hash; + svn_diff__token_index_t index; + void *token; +}; + +struct svn_diff__tree_t +{ + svn_diff__node_t *root[SVN_DIFF__HASH_SIZE]; + apr_pool_t *pool; + svn_diff__token_index_t node_count; +}; + + +/* + * Returns number of tokens in a tree + */ +svn_diff__token_index_t +svn_diff__get_node_count(svn_diff__tree_t *tree) +{ + return tree->node_count; +} + +/* + * Support functions to build a tree of token positions + */ + +void +svn_diff__tree_create(svn_diff__tree_t **tree, apr_pool_t *pool) +{ + *tree = apr_pcalloc(pool, sizeof(**tree)); + (*tree)->pool = pool; + (*tree)->node_count = 0; +} + + +static svn_error_t * +tree_insert_token(svn_diff__node_t **node, svn_diff__tree_t *tree, + void *diff_baton, + const svn_diff_fns2_t *vtable, + apr_uint32_t hash, void *token) +{ + svn_diff__node_t *new_node; + svn_diff__node_t **node_ref; + svn_diff__node_t *parent; + int rv; + + SVN_ERR_ASSERT(token); + + parent = NULL; + node_ref = &tree->root[hash % SVN_DIFF__HASH_SIZE]; + + while (*node_ref != NULL) + { + parent = *node_ref; + + rv = hash - parent->hash; + if (!rv) + SVN_ERR(vtable->token_compare(diff_baton, parent->token, token, &rv)); + + if (rv == 0) + { + /* Discard the previous token. This helps in cases where + * only recently read tokens are still in memory. + */ + if (vtable->token_discard != NULL) + vtable->token_discard(diff_baton, parent->token); + + parent->token = token; + *node = parent; + + return SVN_NO_ERROR; + } + else if (rv > 0) + { + node_ref = &parent->left; + } + else + { + node_ref = &parent->right; + } + } + + /* Create a new node */ + new_node = apr_palloc(tree->pool, sizeof(*new_node)); + new_node->parent = parent; + new_node->left = NULL; + new_node->right = NULL; + new_node->hash = hash; + new_node->token = token; + new_node->index = tree->node_count++; + + *node = *node_ref = new_node; + + return SVN_NO_ERROR; +} + + +/* + * Get all tokens from a datasource. Return the + * last item in the (circular) list. + */ +svn_error_t * +svn_diff__get_tokens(svn_diff__position_t **position_list, + svn_diff__tree_t *tree, + void *diff_baton, + const svn_diff_fns2_t *vtable, + svn_diff_datasource_e datasource, + apr_off_t prefix_lines, + apr_pool_t *pool) +{ + svn_diff__position_t *start_position; + svn_diff__position_t *position = NULL; + svn_diff__position_t **position_ref; + svn_diff__node_t *node; + void *token; + apr_off_t offset; + apr_uint32_t hash; + + *position_list = NULL; + + position_ref = &start_position; + offset = prefix_lines; + hash = 0; /* The callback fn doesn't need to touch it per se */ + while (1) + { + SVN_ERR(vtable->datasource_get_next_token(&hash, &token, + diff_baton, datasource)); + if (token == NULL) + break; + + offset++; + SVN_ERR(tree_insert_token(&node, tree, diff_baton, vtable, hash, token)); + + /* Create a new position */ + position = apr_palloc(pool, sizeof(*position)); + position->next = NULL; + position->token_index = node->index; + position->offset = offset; + + *position_ref = position; + position_ref = &position->next; + } + + *position_ref = start_position; + + SVN_ERR(vtable->datasource_close(diff_baton, datasource)); + + *position_list = position; + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_diff/util.c b/subversion/libsvn_diff/util.c new file mode 100644 index 0000000..9e1f411 --- /dev/null +++ b/subversion/libsvn_diff/util.c @@ -0,0 +1,591 @@ +/* + * util.c : routines for doing diffs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <apr.h> +#include <apr_general.h> + +#include "svn_hash.h" +#include "svn_pools.h" +#include "svn_dirent_uri.h" +#include "svn_props.h" +#include "svn_mergeinfo.h" +#include "svn_error.h" +#include "svn_diff.h" +#include "svn_types.h" +#include "svn_ctype.h" +#include "svn_utf.h" +#include "svn_version.h" + +#include "private/svn_diff_private.h" +#include "diff.h" + +#include "svn_private_config.h" + + +svn_boolean_t +svn_diff_contains_conflicts(svn_diff_t *diff) +{ + while (diff != NULL) + { + if (diff->type == svn_diff__type_conflict) + { + return TRUE; + } + + diff = diff->next; + } + + return FALSE; +} + +svn_boolean_t +svn_diff_contains_diffs(svn_diff_t *diff) +{ + while (diff != NULL) + { + if (diff->type != svn_diff__type_common) + { + return TRUE; + } + + diff = diff->next; + } + + return FALSE; +} + +svn_error_t * +svn_diff_output(svn_diff_t *diff, + void *output_baton, + const svn_diff_output_fns_t *vtable) +{ + svn_error_t *(*output_fn)(void *, + apr_off_t, apr_off_t, + apr_off_t, apr_off_t, + apr_off_t, apr_off_t); + + while (diff != NULL) + { + switch (diff->type) + { + case svn_diff__type_common: + output_fn = vtable->output_common; + break; + + case svn_diff__type_diff_common: + output_fn = vtable->output_diff_common; + break; + + case svn_diff__type_diff_modified: + output_fn = vtable->output_diff_modified; + break; + + case svn_diff__type_diff_latest: + output_fn = vtable->output_diff_latest; + break; + + case svn_diff__type_conflict: + output_fn = NULL; + if (vtable->output_conflict != NULL) + { + SVN_ERR(vtable->output_conflict(output_baton, + diff->original_start, diff->original_length, + diff->modified_start, diff->modified_length, + diff->latest_start, diff->latest_length, + diff->resolved_diff)); + } + break; + + default: + output_fn = NULL; + break; + } + + if (output_fn != NULL) + { + SVN_ERR(output_fn(output_baton, + diff->original_start, diff->original_length, + diff->modified_start, diff->modified_length, + diff->latest_start, diff->latest_length)); + } + + diff = diff->next; + } + + return SVN_NO_ERROR; +} + + +void +svn_diff__normalize_buffer(char **tgt, + apr_off_t *lengthp, + svn_diff__normalize_state_t *statep, + const char *buf, + const svn_diff_file_options_t *opts) +{ + /* Variables for looping through BUF */ + const char *curp, *endp; + + /* Variable to record normalizing state */ + svn_diff__normalize_state_t state = *statep; + + /* Variables to track what needs copying into the target buffer */ + const char *start = buf; + apr_size_t include_len = 0; + svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */ + + /* Variable to record the state of the target buffer */ + char *tgt_newend = *tgt; + + /* If this is a noop, then just get out of here. */ + if (! opts->ignore_space && ! opts->ignore_eol_style) + { + *tgt = (char *)buf; + return; + } + + + /* It only took me forever to get this routine right, + so here my thoughts go: + + Below, we loop through the data, doing 2 things: + + - Normalizing + - Copying other data + + The routine tries its hardest *not* to copy data, but instead + returning a pointer into already normalized existing data. + + To this end, a block 'other data' shouldn't be copied when found, + but only as soon as it can't be returned in-place. + + On a character level, there are 3 possible operations: + + - Skip the character (don't include in the normalized data) + - Include the character (do include in the normalizad data) + - Include as another character + This is essentially the same as skipping the current character + and inserting a given character in the output data. + + The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to + handle the character based operations. The macros themselves + collect character level data into blocks. + + At all times designate the START, INCLUDED_LEN and CURP pointers + an included and and skipped block like this: + + [ start, start + included_len ) [ start + included_len, curp ) + INCLUDED EXCLUDED + + When the routine flips from skipping to including, the last + included block has to be flushed to the output buffer. + */ + + /* Going from including to skipping; only schedules the current + included section for flushing. + Also, simply chop off the character if it's the first in the buffer, + so we can possibly just return the remainder of the buffer */ +#define SKIP \ + do { \ + if (start == curp) \ + ++start; \ + last_skipped = TRUE; \ + } while (0) + +#define INCLUDE \ + do { \ + if (last_skipped) \ + COPY_INCLUDED_SECTION; \ + ++include_len; \ + last_skipped = FALSE; \ + } while (0) + +#define COPY_INCLUDED_SECTION \ + do { \ + if (include_len > 0) \ + { \ + memmove(tgt_newend, start, include_len); \ + tgt_newend += include_len; \ + include_len = 0; \ + } \ + start = curp; \ + } while (0) + + /* Include the current character as character X. + If the current character already *is* X, add it to the + currently included region, increasing chances for consecutive + fully normalized blocks. */ +#define INCLUDE_AS(x) \ + do { \ + if (*curp == (x)) \ + INCLUDE; \ + else \ + { \ + INSERT((x)); \ + SKIP; \ + } \ + } while (0) + + /* Insert character X in the output buffer */ +#define INSERT(x) \ + do { \ + COPY_INCLUDED_SECTION; \ + *tgt_newend++ = (x); \ + } while (0) + + for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp) + { + switch (*curp) + { + case '\r': + if (opts->ignore_eol_style) + INCLUDE_AS('\n'); + else + INCLUDE; + state = svn_diff__normalize_state_cr; + break; + + case '\n': + if (state == svn_diff__normalize_state_cr + && opts->ignore_eol_style) + SKIP; + else + INCLUDE; + state = svn_diff__normalize_state_normal; + break; + + default: + if (svn_ctype_isspace(*curp) + && opts->ignore_space != svn_diff_file_ignore_space_none) + { + /* Whitespace but not '\r' or '\n' */ + if (state != svn_diff__normalize_state_whitespace + && opts->ignore_space + == svn_diff_file_ignore_space_change) + /*### If we can postpone insertion of the space + until the next non-whitespace character, + we have a potential of reducing the number of copies: + If this space is followed by more spaces, + this will cause a block-copy. + If the next non-space block is considered normalized + *and* preceded by a space, we can take advantage of that. */ + /* Note, the above optimization applies to 90% of the source + lines in our own code, since it (generally) doesn't use + more than one space per blank section, except for the + beginning of a line. */ + INCLUDE_AS(' '); + else + SKIP; + state = svn_diff__normalize_state_whitespace; + } + else + { + /* Non-whitespace character, or whitespace character in + svn_diff_file_ignore_space_none mode. */ + INCLUDE; + state = svn_diff__normalize_state_normal; + } + } + } + + /* If we're not in whitespace, flush the last chunk of data. + * Note that this will work correctly when this is the last chunk of the + * file: + * * If there is an eol, it will either have been output when we entered + * the state_cr, or it will be output now. + * * If there is no eol and we're not in whitespace, then we just output + * everything below. + * * If there's no eol and we are in whitespace, we want to ignore + * whitespace unconditionally. */ + + if (*tgt == tgt_newend) + { + /* we haven't copied any data in to *tgt and our chunk consists + only of one block of (already normalized) data. + Just return the block. */ + *tgt = (char *)start; + *lengthp = include_len; + } + else + { + COPY_INCLUDED_SECTION; + *lengthp = tgt_newend - *tgt; + } + + *statep = state; + +#undef SKIP +#undef INCLUDE +#undef INCLUDE_AS +#undef INSERT +#undef COPY_INCLUDED_SECTION +} + +svn_error_t * +svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf, + const char *header_encoding, + apr_pool_t *scratch_pool) +{ + const char *out_str; + + SVN_ERR(svn_utf_cstring_from_utf8_ex2( + &out_str, + APR_EOL_STR + SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR, + header_encoding, scratch_pool)); + svn_stringbuf_appendcstr(stringbuf, out_str); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff__unified_write_hunk_header(svn_stream_t *output_stream, + const char *header_encoding, + const char *hunk_delimiter, + apr_off_t old_start, + apr_off_t old_length, + apr_off_t new_start, + apr_off_t new_length, + const char *hunk_extra_context, + apr_pool_t *scratch_pool) +{ + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + "%s -%" APR_OFF_T_FMT, + hunk_delimiter, old_start)); + /* If the hunk length is 1, suppress the number of lines in the hunk + * (it is 1 implicitly) */ + if (old_length != 1) + { + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + ",%" APR_OFF_T_FMT, old_length)); + } + + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + " +%" APR_OFF_T_FMT, new_start)); + if (new_length != 1) + { + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + ",%" APR_OFF_T_FMT, new_length)); + } + + if (hunk_extra_context == NULL) + hunk_extra_context = ""; + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + " %s%s%s" APR_EOL_STR, + hunk_delimiter, + hunk_extra_context[0] ? " " : "", + hunk_extra_context)); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff__unidiff_write_header(svn_stream_t *output_stream, + const char *header_encoding, + const char *old_header, + const char *new_header, + apr_pool_t *scratch_pool) +{ + SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, + scratch_pool, + "--- %s" APR_EOL_STR + "+++ %s" APR_EOL_STR, + old_header, + new_header)); + return SVN_NO_ERROR; +} + +/* A helper function for display_prop_diffs. Output the differences between + the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a + human-readable form to OUTSTREAM, using ENCODING. Use POOL for temporary + allocations. */ +static svn_error_t * +display_mergeinfo_diff(const char *old_mergeinfo_val, + const char *new_mergeinfo_val, + const char *encoding, + svn_stream_t *outstream, + apr_pool_t *pool) +{ + apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted; + apr_pool_t *iterpool = svn_pool_create(pool); + apr_hash_index_t *hi; + + if (old_mergeinfo_val) + SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool)); + else + old_mergeinfo_hash = NULL; + + if (new_mergeinfo_val) + SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool)); + else + new_mergeinfo_hash = NULL; + + SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash, + new_mergeinfo_hash, + TRUE, pool, pool)); + + for (hi = apr_hash_first(pool, deleted); + hi; hi = apr_hash_next(hi)) + { + const char *from_path = svn__apr_hash_index_key(hi); + svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi); + svn_string_t *merge_revstr; + + svn_pool_clear(iterpool); + SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray, + iterpool)); + + SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, + _(" Reverse-merged %s:r%s%s"), + from_path, merge_revstr->data, + APR_EOL_STR)); + } + + for (hi = apr_hash_first(pool, added); + hi; hi = apr_hash_next(hi)) + { + const char *from_path = svn__apr_hash_index_key(hi); + svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi); + svn_string_t *merge_revstr; + + svn_pool_clear(iterpool); + SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray, + iterpool)); + + SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, + _(" Merged %s:r%s%s"), + from_path, merge_revstr->data, + APR_EOL_STR)); + } + + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff__display_prop_diffs(svn_stream_t *outstream, + const char *encoding, + const apr_array_header_t *propchanges, + apr_hash_t *original_props, + svn_boolean_t pretty_print_mergeinfo, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + int i; + + for (i = 0; i < propchanges->nelts; i++) + { + const char *action; + const svn_string_t *original_value; + const svn_prop_t *propchange + = &APR_ARRAY_IDX(propchanges, i, svn_prop_t); + + if (original_props) + original_value = svn_hash_gets(original_props, propchange->name); + else + original_value = NULL; + + /* If the property doesn't exist on either side, or if it exists + with the same value, skip it. This can happen if the client is + hitting an old mod_dav_svn server that doesn't understand the + "send-all" REPORT style. */ + if ((! (original_value || propchange->value)) + || (original_value && propchange->value + && svn_string_compare(original_value, propchange->value))) + continue; + + svn_pool_clear(iterpool); + + if (! original_value) + action = "Added"; + else if (! propchange->value) + action = "Deleted"; + else + action = "Modified"; + SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, + "%s: %s%s", action, + propchange->name, APR_EOL_STR)); + + if (pretty_print_mergeinfo + && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0) + { + const char *orig = original_value ? original_value->data : NULL; + const char *val = propchange->value ? propchange->value->data : NULL; + svn_error_t *err = display_mergeinfo_diff(orig, val, encoding, + outstream, iterpool); + + /* Issue #3896: If we can't pretty-print mergeinfo differences + because invalid mergeinfo is present, then don't let the diff + fail, just print the diff as any other property. */ + if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) + { + svn_error_clear(err); + } + else + { + SVN_ERR(err); + continue; + } + } + + { + svn_diff_t *diff; + svn_diff_file_options_t options = { 0 }; + const svn_string_t *orig + = original_value ? original_value + : svn_string_create_empty(iterpool); + const svn_string_t *val + = propchange->value ? propchange->value + : svn_string_create_empty(iterpool); + + SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options, + iterpool)); + + /* UNIX patch will try to apply a diff even if the diff header + * is missing. It tries to be helpful by asking the user for a + * target filename when it can't determine the target filename + * from the diff header. But there usually are no files which + * UNIX patch could apply the property diff to, so we use "##" + * instead of "@@" as the default hunk delimiter for property diffs. + * We also supress the diff header. */ + SVN_ERR(svn_diff_mem_string_output_unified2( + outstream, diff, FALSE /* no header */, "##", NULL, NULL, + encoding, orig, val, iterpool)); + } + } + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + + +/* Return the library version number. */ +const svn_version_t * +svn_diff_version(void) +{ + SVN_VERSION_BODY; +} |