diff options
Diffstat (limited to 'sys/contrib/octeon-sdk/cvmx-dfa.h')
-rw-r--r-- | sys/contrib/octeon-sdk/cvmx-dfa.h | 802 |
1 files changed, 802 insertions, 0 deletions
diff --git a/sys/contrib/octeon-sdk/cvmx-dfa.h b/sys/contrib/octeon-sdk/cvmx-dfa.h new file mode 100644 index 0000000..a2d1602 --- /dev/null +++ b/sys/contrib/octeon-sdk/cvmx-dfa.h @@ -0,0 +1,802 @@ +/***********************license start*************** + * Copyright (c) 2003-2010 Cavium Inc. (support@cavium.com). All rights + * reserved. + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + + * * Neither the name of Cavium Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + + * This Software, including technical data, may be subject to U.S. export control + * laws, including the U.S. Export Administration Act and its associated + * regulations, and may be subject to export or import regulations in other + * countries. + + * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" + * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR + * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO + * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR + * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM + * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, + * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF + * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR + * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR + * PERFORMANCE OF THE SOFTWARE LIES WITH YOU. + ***********************license end**************************************/ + + + + + + + +/** + * @file + * + * Interface to the CN31XX, CN38XX, and CN58XX hardware DFA engine. + * + * <hr>$Revision: 70030 $<hr> + */ + +#ifndef __CVMX_DFA_H__ +#define __CVMX_DFA_H__ +#include "cvmx-llm.h" +#include "cvmx-wqe.h" +#include "cvmx-fpa.h" + +#include "executive-config.h" +#ifdef CVMX_ENABLE_DFA_FUNCTIONS +#include "cvmx-config.h" +#endif + +#define ENABLE_DEPRECATED /* Set to enable the old 18/36 bit names */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Maximum nodes available in a small encoding */ +#define CVMX_DFA_NODESM_MAX_NODES ((OCTEON_IS_MODEL(OCTEON_CN31XX)) ? 0x8000 : 0x20000) +#define CVMX_DFA_NODESM_SIZE 512 /* Size of each node for small encoding */ +#define CVMX_DFA_NODELG_SIZE 1024 /* Size of each node for large encoding */ +#define CVMX_DFA_NODESM_LAST_TERMINAL (CVMX_DFA_NODESM_MAX_NODES-1) + +#ifdef ENABLE_DEPRECATED +/* These defines are for compatability with old code. They are deprecated */ +#define CVMX_DFA_NODE18_SIZE CVMX_DFA_NODESM_SIZE +#define CVMX_DFA_NODE36_SIZE CVMX_DFA_NODELG_SIZE +#define CVMX_DFA_NODE18_MAX_NODES CVMX_DFA_NODESM_MAX_NODES +#define CVMX_DFA_NODE18_LAST_TERMINAL CVMX_DFA_NODESM_LAST_TERMINAL +#endif + +/** + * Which type of memory encoding is this graph using. Make sure you setup + * the LLM to match. + */ +typedef enum +{ + CVMX_DFA_GRAPH_TYPE_SM = 0, + CVMX_DFA_GRAPH_TYPE_LG = 1, +#ifdef ENABLE_DEPRECATED + CVMX_DFA_GRAPH_TYPE_18b = 0, /* Deprecated */ + CVMX_DFA_GRAPH_TYPE_36b = 1 /* Deprecated */ +#endif +} cvmx_dfa_graph_type_t; + +/** + * The possible node types. + */ +typedef enum +{ + CVMX_DFA_NODE_TYPE_NORMAL = 0, /**< Node is a branch */ + CVMX_DFA_NODE_TYPE_MARKED = 1, /**< Node is marked special */ + CVMX_DFA_NODE_TYPE_TERMINAL = 2 /**< Node is a terminal leaf */ +} cvmx_dfa_node_type_t; + +/** + * The possible reasons the DFA stopped processing. + */ +typedef enum +{ + CVMX_DFA_STOP_REASON_DATA_GONE = 0, /**< DFA ran out of data */ + CVMX_DFA_STOP_REASON_PARITY_ERROR = 1, /**< DFA encountered a memory error */ + CVMX_DFA_STOP_REASON_FULL = 2, /**< DFA is full */ + CVMX_DFA_STOP_REASON_TERMINAL = 3 /**< DFA hit a terminal */ +} cvmx_dfa_stop_reason_t; + +/** + * This format describes the DFA pointers in small mode + */ +typedef union +{ + uint64_t u64; + struct + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t next_node1 :15;/**< Next node if an odd character match */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t next_node0 :15;/**< Next node if an even character match */ + } w32; + struct + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t next_node1 :17;/**< Next node if an odd character match */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t next_node0 :17;/**< Next node if an even character match */ + } w36; + struct /**< @ this structure only applies starting in CN58XX and if DFA_CFG[NRPL_ENA] == 1 and IWORD0[NREPLEN] == 1. */ + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t per_node_repl1 : 1;/**< enable for extra replicaiton for next node (CN58XX) */ + uint64_t next_node_repl1 : 2;/**< extra replicaiton for next node (CN58XX) (if per_node_repl1 is set) */ + uint64_t next_node1 :14;/**< Next node if an odd character match - IWORD3[Msize], if per_node_repl1==1. */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t per_node_repl0 : 1;/**< enable for extra replicaiton for next node (CN58XX) */ + uint64_t next_node_repl0 : 2;/**< extra replicaiton for next node (CN58XX) (if per_node_repl0 is set) */ + uint64_t next_node0 :14;/**< Next node if an odd character match - IWORD3[Msize], if per_node_repl0==1. */ + } w36nrepl_en; /**< use when next_node_repl[01] is 1. */ + struct /**< this structure only applies starting in CN58XX and if DFA_CFG[NRPL_ENA] == 1 and IWORD0[NREPLEN] == 1. */ + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t per_node_repl1 : 1;/**< enable for extra replicaiton for next node (CN58XX) */ + uint64_t next_node1 :16;/**< Next node if an odd character match, if per_node_repl1==0. */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t per_node_repl0 : 1;/**< enable for extra replicaiton for next node (CN58XX) */ + uint64_t next_node0 :16;/**< Next node if an odd character match, if per_node_repl0==0. */ + } w36nrepl_dis; /**< use when next_node_repl[01] is 0. */ +#if defined(ENABLE_DEPRECATED) && !OCTEON_IS_COMMON_BINARY() +#if CVMX_COMPILED_FOR(OCTEON_CN31XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t next_node1 :15;/**< Next node if an odd character match */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t next_node0 :15;/**< Next node if an even character match */ + }; +#elif CVMX_COMPILED_FOR(OCTEON_CN38XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t p1 : 1;/**< Set if next_node1 is odd parity */ + uint64_t next_node1 :17;/**< Next node if an odd character match */ + uint64_t p0 : 1;/**< Set if next_node0 is odd parity */ + uint64_t next_node0 :17;/**< Next node if an even character match */ + }; +#else + /* Other chips don't support the deprecated unnamed unions */ +#endif +#endif +} cvmx_dfa_node_next_sm_t; + +/** + * This format describes the DFA pointers in large mode + */ +typedef union +{ + uint64_t u64; + struct + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + cvmx_dfa_node_type_t type : 2;/**< Node type */ + uint64_t mbz2 : 3;/**< Must be zero */ + uint64_t next_node :20;/**< Next node */ + } w32; + struct + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + cvmx_dfa_node_type_t type : 2;/**< Node type */ + uint64_t extra_bits : 5;/**< bits copied to report (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node_repl : 2;/**< extra replicaiton for next node (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node :20;/**< Next node ID, Note, combine with next_node_repl to use as start_node + for continuation, as in cvmx_dfa_node_next_lgb_t. */ + } w36; +#if defined(ENABLE_DEPRECATED) && !OCTEON_IS_COMMON_BINARY() +#if CVMX_COMPILED_FOR(OCTEON_CN31XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + cvmx_dfa_node_type_t type : 2;/**< Node type */ + uint64_t mbz2 : 3;/**< Must be zero */ + uint64_t next_node :20;/**< Next node */ + }; +#elif CVMX_COMPILED_FOR(OCTEON_CN38XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + cvmx_dfa_node_type_t type : 2;/**< Node type */ + uint64_t extra_bits : 5;/**< bits copied to report (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node_repl : 2;/**< extra replicaiton for next node (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node :20;/**< Next node ID, Note, combine with next_node_repl to use as start_node + for continuation, as in cvmx_dfa_node_next_lgb_t. */ + }; +#else + /* Other chips don't support the deprecated unnamed unions */ +#endif +#endif +} cvmx_dfa_node_next_lg_t; + +/** + * This format describes the DFA pointers in large mode, another way + */ +typedef union +{ + uint64_t u64; + struct + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + uint64_t type_terminal : 1;/**< Node type */ + uint64_t type_marked : 1;/**< Node type */ + uint64_t mbz2 : 3;/**< Must be zero */ + uint64_t next_node :20;/**< Next node */ + } w32; + struct + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + uint64_t type_terminal : 1;/**< Node type */ + uint64_t type_marked : 1;/**< Node type */ + uint64_t extra_bits : 5;/**< bits copied to report (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node_id_and_repl :22;/**< Next node ID (and repl for PASS3/CN58XX or repl=0 if not), + use this as start node for continuation. */ + } w36; +#if defined(ENABLE_DEPRECATED) && !OCTEON_IS_COMMON_BINARY() +#if CVMX_COMPILED_FOR(OCTEON_CN31XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :32;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + uint64_t type_terminal : 1;/**< Node type */ + uint64_t type_marked : 1;/**< Node type */ + uint64_t mbz2 : 3;/**< Must be zero */ + uint64_t next_node :20;/**< Next node */ + }; +#elif CVMX_COMPILED_FOR(OCTEON_CN38XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :28;/**< Must be zero */ + uint64_t ecc : 7;/**< ECC checksum on the rest of the bits */ + uint64_t type_terminal : 1;/**< Node type */ + uint64_t type_marked : 1;/**< Node type */ + uint64_t extra_bits : 5;/**< bits copied to report (PASS3/CN58XX), Must be zero previously */ + uint64_t next_node_id_and_repl :22;/**< Next node ID (and repl for PASS3/CN58XX or repl=0 if not), + use this as start node for continuation. */ + }; +#else + /* Other chips don't support the deprecated unnamed unions */ +#endif +#endif +} cvmx_dfa_node_next_lgb_t; + +/** + * This format describes the DFA pointers in large mode + */ +typedef union +{ + uint64_t u64; + struct + { + uint64_t mbz :27;/**< Must be zero */ + uint64_t x0 : 1;/**< XOR of the rest of the bits */ + uint64_t reserved : 4;/**< Must be zero */ + uint64_t data :32;/**< LLM Data */ + } w32; + struct + { + uint64_t mbz :27;/**< Must be zero */ + uint64_t x0 : 1;/**< XOR of the rest of the bits */ + uint64_t data :36;/**< LLM Data */ + } w36; +#if defined(ENABLE_DEPRECATED) && !OCTEON_IS_COMMON_BINARY() +#if CVMX_COMPILED_FOR(OCTEON_CN31XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :27;/**< Must be zero */ + uint64_t x0 : 1;/**< XOR of the rest of the bits */ + uint64_t reserved : 4;/**< Must be zero */ + uint64_t data :32;/**< LLM Data */ + }; +#elif CVMX_COMPILED_FOR(OCTEON_CN38XX) + struct /**< @deprecated unnamed reference to members */ + { + uint64_t mbz :27;/**< Must be zero */ + uint64_t x0 : 1;/**< XOR of the rest of the bits */ + uint64_t data :36;/**< LLM Data */ + }; +#else + /* Other chips don't support the deprecated unnamed unions */ +#endif +#endif +} cvmx_dfa_node_next_read_t; + +/** + * This structure defines the data format in the low-latency memory + */ +typedef union +{ + uint64_t u64; + cvmx_dfa_node_next_sm_t sm; /**< This format describes the DFA pointers in small mode */ + cvmx_dfa_node_next_lg_t lg; /**< This format describes the DFA pointers in large mode */ + cvmx_dfa_node_next_lgb_t lgb; /**< This format describes the DFA pointers in large mode, another way */ + cvmx_dfa_node_next_read_t read; /**< This format describes the DFA pointers in large mode */ +#ifdef ENABLE_DEPRECATED + cvmx_dfa_node_next_sm_t s18; /**< Deprecated */ + cvmx_dfa_node_next_lg_t s36; /**< Deprecated */ + cvmx_dfa_node_next_lgb_t s36b; /**< Deprecated */ +#endif +} cvmx_dfa_node_next_t; + +/** + * These structures define a DFA instruction + */ +typedef union +{ + uint64_t u64[4]; + uint32_t u32; + struct + { + // WORD 0 + uint64_t gxor : 8; /**< Graph XOR value (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[GXOR_ENA] == 0. */ + uint64_t nxoren : 1; /**< Node XOR enable (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NXOR_ENA] == 0. */ + uint64_t nreplen : 1; /**< Node Replication mode enable (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NRPL_ENA] == 0 or IWORD0[Ty] == 0. */ +#if 0 + uint64_t snrepl : 2; /**< Start_Node Replication (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NRPL_ENA] == 0 or IWORD0[Ty] == 0 or IWORD0[NREPLEN] == 0. */ + uint64_t start_node_id : 20; /**< Node to start the walk from */ +#else + uint64_t start_node : 22; /**< Node to start the walk from, includes ID and snrepl, see notes above. */ +#endif + + uint64_t unused02 : 2; /**< Must be zero */ + cvmx_llm_replication_t replication : 2; /**< Type of memory replication to use */ + uint64_t unused03 : 3; /**< Must be zero */ + cvmx_dfa_graph_type_t type : 1; /**< Type of graph */ + uint64_t unused04 : 4; /**< Must be zero */ + uint64_t base : 20; /**< All tables start on 1KB boundary */ + + // WORD 1 + uint64_t input_length : 16; /**< In bytes, # pointers in gather case */ + uint64_t use_gather : 1; /**< Set to use gather */ + uint64_t no_L2_alloc : 1; /**< Set to disable loading of the L2 cache by the DFA */ + uint64_t full_block_write : 1; /**< If set, HW can write entire cache blocks @ result_ptr */ + uint64_t little_endian : 1; /**< Affects only packet data, not instruction, gather list, or result */ + uint64_t unused1 : 8; /**< Must be zero */ + uint64_t data_ptr : 36; /**< Either directly points to data or the gather list. If gather list, + data_ptr<2:0> must be zero (i.e. 8B aligned) */ + // WORD 2 + uint64_t max_results : 16; /**< in 64-bit quantities, mbz for store */ + uint64_t unused2 : 12; /**< Must be zero */ + uint64_t result_ptr : 36; /**< must be 128 byte aligned */ + + // WORD 3 + uint64_t tsize : 8; /**< tsize*256 is the number of terminal nodes for GRAPH_TYPE_SM */ + uint64_t msize : 16; /**< msize is the number of marked nodes for GRAPH_TYPE_SM */ + uint64_t unused3 : 4; /**< Must be zero */ + uint64_t wq_ptr : 36; /**< 0 for no work queue entry creation */ + } s; +} cvmx_dfa_command_t; + +/** + * Format of the first result word written by the hardware. + */ +typedef union +{ + uint64_t u64; + struct + { + cvmx_dfa_stop_reason_t reas : 2;/**< Reason the DFA stopped */ + uint64_t mbz :44;/**< Zero */ + uint64_t last_marked : 1;/**< Set if the last entry written is marked */ + uint64_t done : 1;/**< Set to 1 when the DFA completes */ + uint64_t num_entries :16;/**< Number of result words written */ + } s; +} cvmx_dfa_result0_t; + +/** + * Format of the second result word and subsequent result words written by the hardware. + */ +typedef union +{ + uint64_t u64; + struct + { + uint64_t byte_offset : 16; /**< Number of bytes consumed */ + uint64_t extra_bits_high: 4; /**< If PASS3 or CN58XX and DFA_CFG[NRPL_ENA] == 1 and IWORD0[Ty] == 1, + then set to <27:24> of the last next-node pointer. Else set to 0x0. */ + uint64_t prev_node : 20; /**< Index of the previous node */ + uint64_t extra_bits_low : 2; /**< If PASS3 or CN58XX and DFA_CFG[NRPL_ENA] == 1 and IWORD0[Ty] == 1, + then set to <23:22> of the last next-node pointer. Else set to 0x0. */ + uint64_t next_node_repl : 2; /**< If PASS3 or CN58XX and DFA_CFG[NRPL_ENA] == 1 and IWORD0[Ty] == 1, then set + to next_node_repl (<21:20>) of the last next-node pointer. Else set to 0x0. */ + uint64_t current_node : 20; /**< Index of the current node */ + } s; + struct + { + uint64_t byte_offset : 16; /**< Number of bytes consumed */ + uint64_t extra_bits_high: 4; /**< If PASS3 or CN58XX and DFA_CFG[NRPL_ENA] == 1 and IWORD0[Ty] == 1, + then set to <27:24> of the last next-node pointer. Else set to 0x0. */ + uint64_t prev_node : 20; /**< Index of the previous node */ + uint64_t extra_bits_low : 2; /**< If PASS3 or CN58XX and DFA_CFG[NRPL_ENA] == 1 and IWORD0[Ty] == 1, + then set to <23:22> of the last next-node pointer. Else set to 0x0. */ + uint64_t curr_id_and_repl:22; /**< Use ths as start_node for continuation. */ + } s2; +} cvmx_dfa_result1_t; + +/** + * Abstract DFA graph + */ +typedef struct +{ + cvmx_llm_replication_t replication; /**< Level of memory replication to use. Must match the LLM setup */ + cvmx_dfa_graph_type_t type; /**< Type of graph */ + uint64_t base_address; /**< LLM start address of the graph */ + union { + struct { + uint64_t gxor : 8; /**< Graph XOR value (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[GXOR_ENA] == 0. */ + uint64_t nxoren : 1; /**< Node XOR enable (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NXOR_ENA] == 0. */ + uint64_t nreplen : 1; /**< Node Replication mode enable (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NRPL_ENA] == 0 or IWORD0[Ty] == 0. */ + uint64_t snrepl : 2; /**< Start_Node Replication (PASS3/CN58XX), Must be zero for other chips + or if DFA_CFG[NRPL_ENA] == 0 or IWORD0[Ty] == 0 or IWORD0[NREPLEN] == 0.*/ + uint64_t start_node_id : 20; /**< Start node index for the root of the graph */ + }; + uint32_t start_node; /**< Start node index for the root of the graph, incl. snrepl (PASS3/CN58XX) + NOTE: for backwards compatibility this name includes the the + gxor, nxoren, nreplen, and snrepl fields which will all be + zero in applicaitons existing before the introduction of these + fields, so that existing applicaiton do not need to change. */ + }; + int num_terminal_nodes; /**< Number of terminal nodes in the graph. Only needed for small graphs. */ + int num_marked_nodes; /**< Number of marked nodes in the graph. Only needed for small graphs. */ +} cvmx_dfa_graph_t; + +/** + * DFA internal global state -- stored in 8 bytes of FAU + */ +typedef union +{ + uint64_t u64; + struct { +#define CVMX_DFA_STATE_TICKET_BIT_POS 16 +#ifdef __BIG_ENDIAN_BITFIELD + // NOTE: must clear LSB of base_address_div16 due to ticket overflow + uint32_t base_address_div16; /**< Current DFA instruction queue chunck base address/16 (clear LSB). */ + uint8_t ticket_loops; /**< bits [15:8] of total number of tickets requested. */ + uint8_t ticket; /**< bits [7:0] of total number of tickets requested (current ticket held). */ + // NOTE: index and now_serving are written together + uint8_t now_serving; /**< current ticket being served (or ready to be served). */ + uint8_t index; /**< index into current chunk: (base_address_div16*16)[index] = next entry. */ +#else // NOTE: little endian mode probably won't work + uint8_t index; + uint8_t now_serving; + uint8_t ticket; + uint8_t ticket_loops; + uint32_t base_address_div16; +#endif + } s; + struct { // a bitfield version of the same thing to extract base address while clearing carry. +#ifdef __BIG_ENDIAN_BITFIELD + uint64_t base_address_div32 : 31; /**< Current DFA instruction queue chunck base address/32. */ + uint64_t carry : 1; /**< Carry out from total_tickets. */ + uint64_t total_tickets : 16; /**< Total tickets. */ + uint64_t now_serving : 8 ; /**< current ticket being served (or ready to be served). */ + uint64_t index : 8 ; /**< index into current chunk. */ +#else // NOTE: little endian mode probably won't work + uint64_t index : 8 ; + uint64_t now_serving : 8 ; + uint64_t total_tickets : 16; + uint64_t carry : 1; + uint64_t base_address_div32 : 31; +#endif + } s2; +} cvmx_dfa_state_t; + +/* CSR typedefs have been moved to cvmx-dfa-defs.h */ + +/** + * Write a small node edge to LLM. + * + * @param graph Graph to modify + * @param source_node + * Source node for this edge + * @param match_index + * Index into the node edge table. This is the match character/2. + * @param destination_node0 + * Destination if the character matches (match_index*2). + * @param destination_node1 + * Destination if the character matches (match_index*2+1). + */ +static inline void cvmx_dfa_write_edge_sm(const cvmx_dfa_graph_t *graph, + uint64_t source_node, uint64_t match_index, + uint64_t destination_node0, uint64_t destination_node1) +{ + cvmx_llm_address_t address; + cvmx_dfa_node_next_t next_ptr; + + address.u64 = graph->base_address + source_node * CVMX_DFA_NODESM_SIZE + match_index * 4; + + next_ptr.u64 = 0; + if (OCTEON_IS_MODEL(OCTEON_CN31XX)) + { + next_ptr.sm.w32.next_node0 = destination_node0; + next_ptr.sm.w32.p0 = cvmx_llm_parity(destination_node0); + + next_ptr.sm.w32.next_node1 = destination_node1; + next_ptr.sm.w32.p1 = cvmx_llm_parity(destination_node1); + } + else + { + next_ptr.sm.w36.next_node0 = destination_node0; + next_ptr.sm.w36.p0 = cvmx_llm_parity(destination_node0); + + next_ptr.sm.w36.next_node1 = destination_node1; + next_ptr.sm.w36.p1 = cvmx_llm_parity(destination_node1); + } + + cvmx_llm_write36(address, next_ptr.u64, 0); +} +#ifdef ENABLE_DEPRECATED +#define cvmx_dfa_write_edge18 cvmx_dfa_write_edge_sm +#endif + + +/** + * Write a large node edge to LLM. + * + * @param graph Graph to modify + * @param source_node + * Source node for this edge + * @param match Character to match before taking this edge. + * @param destination_node + * Destination node of the edge. + * @param destination_type + * Node type at the end of this edge. + */ +static inline void cvmx_dfa_write_node_lg(const cvmx_dfa_graph_t *graph, + uint64_t source_node, unsigned char match, + uint64_t destination_node, cvmx_dfa_node_type_t destination_type) +{ + cvmx_llm_address_t address; + cvmx_dfa_node_next_t next_ptr; + + address.u64 = graph->base_address + source_node * CVMX_DFA_NODELG_SIZE + (uint64_t)match * 4; + + next_ptr.u64 = 0; + if (OCTEON_IS_MODEL(OCTEON_CN31XX)) + { + next_ptr.lg.w32.type = destination_type; + next_ptr.lg.w32.next_node = destination_node; + next_ptr.lg.w32.ecc = cvmx_llm_ecc(next_ptr.u64); + } + else + { + next_ptr.lg.w36.type = destination_type; + next_ptr.lg.w36.next_node = destination_node; + next_ptr.lg.w36.ecc = cvmx_llm_ecc(next_ptr.u64); + } + + cvmx_llm_write36(address, next_ptr.u64, 0); +} +#ifdef ENABLE_DEPRECATED +#define cvmx_dfa_write_node36 cvmx_dfa_write_node_lg +#endif + +/** + * Ring the DFA doorbell telling it that new commands are + * available. + * + * @param num_commands + * Number of new commands + */ +static inline void cvmx_dfa_write_doorbell(uint64_t num_commands) +{ + CVMX_SYNCWS; + cvmx_write_csr(CVMX_DFA_DBELL, num_commands); +} + +/** + * @INTERNAL + * Write a new command to the DFA. Calls to this function + * are internally synchronized across all processors, and + * the doorbell is rung during this function. + * + * @param command Command to write + */ + +#ifdef CVMX_ENABLE_DFA_FUNCTIONS +static inline void __cvmx_dfa_write_command(cvmx_dfa_command_t *command) +{ + cvmx_dfa_state_t cvmx_dfa_state; + uint64_t my_ticket; // needs to wrap to 8 bits + uint64_t index; + cvmx_dfa_command_t *head; + + CVMX_PREFETCH0(command); + // take a ticket. + cvmx_dfa_state.u64 = cvmx_fau_fetch_and_add64(CVMX_FAU_DFA_STATE, 1ull<<CVMX_DFA_STATE_TICKET_BIT_POS); + my_ticket = cvmx_dfa_state.s.ticket; + + // see if it is our turn + while (my_ticket != cvmx_dfa_state.s.now_serving) { + int delta = my_ticket - cvmx_dfa_state.s.now_serving; + if (delta < 0) delta += 256; + cvmx_wait(10*delta); // reduce polling load on system + cvmx_dfa_state.u64 = cvmx_fau_fetch_and_add64(CVMX_FAU_DFA_STATE, 0); // poll for my_ticket==now_serving + } + + // compute index and instruction queue head pointer + index = cvmx_dfa_state.s.index; + + // NOTE: the DFA only supports 36-bit addressing + head = &((CASTPTR(cvmx_dfa_command_t, (cvmx_dfa_state.s2.base_address_div32 * 32ull))[index])); + head = (cvmx_dfa_command_t*)cvmx_phys_to_ptr(CAST64(head)); // NOTE: since we are not storing bit 63 of address, we must set it now + + // copy the command to the instruction queue + *head++ = *command; + + // check if a new chunk is needed + if (cvmx_unlikely((++index >= ((CVMX_FPA_DFA_POOL_SIZE-8)/sizeof(cvmx_dfa_command_t))))) { + uint64_t *new_base = (uint64_t*)cvmx_fpa_alloc(CVMX_FPA_DFA_POOL); // could make this async + if (new_base) { + // put the link into the instruction queue's "Next Chunk Buffer Ptr" + *(uint64_t *)head = cvmx_ptr_to_phys(new_base); + // update our state (note 32-bit write to not disturb other fields) + cvmx_fau_atomic_write32((cvmx_fau_reg_32_t)(CVMX_FAU_DFA_STATE + (CAST64(&cvmx_dfa_state.s.base_address_div16)-CAST64(&cvmx_dfa_state))), + (CAST64(new_base))/16); + } + else { + cvmx_dprintf("__cvmx_dfa_write_command: Out of memory. Expect crashes.\n"); + } + index=0; + } + + cvmx_dfa_write_doorbell(1); + + // update index and now_serving in the DFA state FAU location (NOTE: this write16 updates to 8-bit values.) + // NOTE: my_ticket+1 carry out is lost due to write16 and index has already been wrapped to fit in uint8. + cvmx_fau_atomic_write16((cvmx_fau_reg_16_t)(CVMX_FAU_DFA_STATE+(CAST64(&cvmx_dfa_state.s.now_serving) - CAST64(&cvmx_dfa_state))), + ((my_ticket+1)<<8) | index); +} + + +/** + * Submit work to the DFA units for processing + * + * @param graph Graph to process + * @param start_node + * The node to start (or continue) walking from + * includes. start_node_id and snrepl (PASS3/CN58XX), but gxor, + * nxoren, and nreplen are taken from the graph structure + * @param input The input to match against + * @param input_length + * The length of the input in bytes + * @param use_gather + * The input and input_length are of a gather list + * @param is_little_endian + * Set to 1 if the input is in little endian format and must + * be swapped before compare. + * @param result Location the DFA should put the results in. This must be + * an area sized in multiples of a cache line. + * @param max_results + * The maximum number of 64-bit result1 words after result0. + * That is, "size of the result area in 64-bit words" - 1. + * max_results must be at least 1. + * @param work Work queue entry to submit when DFA completes. Can be NULL. + */ +static inline void cvmx_dfa_submit(const cvmx_dfa_graph_t *graph, int start_node, + void *input, int input_length, int use_gather, int is_little_endian, + cvmx_dfa_result0_t *result, int max_results, cvmx_wqe_t *work) +{ + cvmx_dfa_command_t command; + + /* Make sure the result's first 64bit word is zero so we can tell when the + DFA is done. */ + result->u64 = 0; + + // WORD 0 + command.u64[0] = 0; + command.s.gxor = graph->gxor; // (PASS3/CN58XX) + command.s.nxoren = graph->nxoren; // (PASS3/CN58XX) + command.s.nreplen = graph->nreplen; // (PASS3/CN58XX) + command.s.start_node = start_node; // includes snrepl (PASS3/CN58XX) + command.s.replication = graph->replication; + command.s.type = graph->type; + command.s.base = graph->base_address>>10; + + // WORD 1 + command.u64[1] = 0; + command.s.input_length = input_length; + command.s.use_gather = use_gather; + command.s.no_L2_alloc = 0; + command.s.full_block_write = 1; + command.s.little_endian = is_little_endian; + command.s.data_ptr = cvmx_ptr_to_phys(input); + + // WORD 2 + command.u64[2] = 0; + command.s.max_results = max_results; + command.s.result_ptr = cvmx_ptr_to_phys(result); + + // WORD 3 + command.u64[3] = 0; + if (graph->type == CVMX_DFA_GRAPH_TYPE_SM) + { + command.s.tsize = (graph->num_terminal_nodes + 255) / 256; + command.s.msize = graph->num_marked_nodes; + } + command.s.wq_ptr = cvmx_ptr_to_phys(work); + + __cvmx_dfa_write_command(&command); // NOTE: this does synchronization and rings doorbell +} +#endif + +/** + * DFA gather list element + */ +typedef struct { + uint64_t length : 16; /**< length of piece of data at addr */ + uint64_t reserved : 12; /**< reserved, set to 0 */ + uint64_t addr : 36; /**< pointer to piece of data */ +} cvmx_dfa_gather_entry_t; + + +/** + * Check if a DFA has completed processing + * + * @param result_ptr Result area the DFA is using + * @return Non zero if the DFA is done + */ +static inline uint64_t cvmx_dfa_is_done(cvmx_dfa_result0_t *result_ptr) +{ + /* DFA sets the first result 64bit word to non zero when it's done */ + return ((volatile cvmx_dfa_result0_t *)result_ptr)->s.done; +} + + +#ifdef CVMX_ENABLE_DFA_FUNCTIONS +/** + * Initialize the DFA hardware before use + * Returns 0 on success, -1 on failure + */ +int cvmx_dfa_initialize(void); + + +/** + * Shutdown and cleanup resources used by the DFA + */ +void cvmx_dfa_shutdown(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __CVMX_DFA_H__ */ |